Added support for openai api tts.

This commit is contained in:
2025-04-05 14:40:56 +00:00
parent b8e2e6e238
commit e8eb93ae1b
11 changed files with 2063 additions and 989 deletions
+323 -177
View File
@@ -23,7 +23,6 @@ export class BrowserTTSHandler extends TTSHandler {
this.available = false;
this.voices = [];
this.currentUtterance = null;
this.preloadCache = new Map();
// Add dependencies
this.dependencies = ['localization', 'persistence-manager'];
@@ -61,114 +60,110 @@ export class BrowserTTSHandler extends TTSHandler {
async initialize(progressCallback = null) {
try {
if (progressCallback) {
progressCallback(10, "Initializing Browser TTS Handler");
progressCallback(10, 'Initializing Browser TTS');
}
// Check if the browser supports speech synthesis
this.changeState('LOADING');
// Check for browser support
if (!window.speechSynthesis) {
console.error("Browser TTS: Speech synthesis not supported by browser");
console.warn('Browser TTS: Speech synthesis not available in this browser');
if (progressCallback) {
progressCallback(100, "Browser TTS unavailable");
progressCallback(100, 'Browser TTS not available');
}
this.changeState('ERROR');
return false;
}
if (progressCallback) {
progressCallback(30, "Loading voices");
progressCallback(30, 'Browser TTS supported');
}
// Check for required dependencies
const localization = this.getModule('localization');
const persistenceManager = this.getModule('persistence-manager');
if (!localization) {
console.error('Browser TTS: Required dependency \'localization\' not found');
this.changeState('ERROR');
return false;
}
if (!persistenceManager) {
console.error('Browser TTS: Required dependency \'persistence-manager\' not found');
this.changeState('ERROR');
return false;
}
if (progressCallback) {
progressCallback(40, 'Browser TTS dependencies loaded');
}
// Load voices - but don't fail initialization if no voices are found yet
// The browser may provide voices later
try {
// Load available voices
await this.loadVoices();
console.log(`Browser TTS: Loaded ${this.voices.length} voices initially`);
} catch (error) {
console.warn('Browser TTS: Error loading voices initially:', error);
// Don't fail initialization - voices may become available later
this.voices = [];
}
if (progressCallback) {
progressCallback(60, `Browser TTS loaded ${this.voices.length} voices`);
}
// Set speech options from preferences
try {
const rate = persistenceManager.getPreference('tts', 'speed', 1.0);
const pitch = persistenceManager.getPreference('tts', 'pitch', 1.0);
const volume = persistenceManager.getPreference('tts', 'volume', 1.0);
this.options.rate = parseFloat(rate);
this.options.pitch = parseFloat(pitch);
this.options.volume = parseFloat(volume);
// Log all available voices for debugging
console.log('Browser TTS: Available voices:', this.voices.map(v => `${v.name} (${v.lang})`));
// Set voice based on locale
const locale = localization.getLocale();
console.log(`Browser TTS: Setting voice for locale: ${locale}`);
const preferredVoice = persistenceManager.getPreference('tts', 'browser_voice');
await this.selectVoiceForLocale(locale, preferredVoice);
if (progressCallback) {
progressCallback(70, "Setting up voice");
}
// Get localization module
const localization = this.getModule('localization');
const persistenceManager = this.getModule('persistence-manager');
// Get current locale and preferred voice
let currentLocale = 'en-us';
let preferredVoice = '';
if (localization) {
currentLocale = localization.getLocale();
} else {
console.error("Browser TTS: Localization module not found");
}
if (persistenceManager) {
preferredVoice = persistenceManager.getPreference('tts', 'voice', '');
} else {
console.error("Browser TTS: Persistence Manager module not found");
}
// Set voice based on locale and preferences
await this.selectVoiceForLocale(currentLocale, preferredVoice);
// Check if we have a voice set
if (this.voiceOptions.voice) {
this.available = true;
this.isReady = true;
if (progressCallback) {
progressCallback(100, "Browser TTS Handler ready");
}
return true;
} else {
// Try one more time with a delay
console.log("Browser TTS: No voice set, trying again after delay");
if (progressCallback) {
progressCallback(80, "Retrying voice loading");
}
// Wait a bit and try again
return new Promise(resolve => {
setTimeout(async () => {
await this.loadVoices();
await this.selectVoiceForLocale(currentLocale, preferredVoice);
if (this.voiceOptions.voice) {
this.available = true;
this.isReady = true;
if (progressCallback) {
progressCallback(100, "Browser TTS Handler ready");
}
resolve(true);
} else {
console.error("Browser TTS: Failed to set voice after retry");
if (progressCallback) {
progressCallback(100, "Browser TTS initialization failed");
}
resolve(false);
}
}, 1000);
});
progressCallback(80, 'Browser TTS voice selected');
}
} catch (error) {
console.error("Browser TTS: Error loading voices:", error);
if (progressCallback) {
progressCallback(100, "Browser TTS initialization failed");
}
return false;
console.warn('Browser TTS: Error setting speech options:', error);
// Don't fail initialization due to voice selection issues
}
} catch (error) {
console.error("Browser TTS: Initialization error:", error);
// If voices were loaded but no voice is selected, try to set a default
if (this.voices.length > 0 && !this.voiceOptions.voice) {
console.warn('Browser TTS: No voice selected after initialization, trying fallback');
this.voiceOptions.voice = this.voices[0];
}
// Always mark as available if speech synthesis is supported, regardless of voice selection
// This ensures the Browser TTS option always appears in the dropdown
this.available = true;
this.isReady = true;
if (progressCallback) {
progressCallback(100, "Browser TTS initialization failed");
progressCallback(100, 'Browser TTS initialized');
}
this.changeState('FINISHED');
return true;
} catch (error) {
console.error('Browser TTS: Initialization error:', error);
if (progressCallback) {
progressCallback(100, `Browser TTS initialization failed - ${error.message}`);
}
this.changeState('ERROR');
return false;
}
}
@@ -180,14 +175,8 @@ export class BrowserTTSHandler extends TTSHandler {
await this.loadVoices();
const localization = this.getModule('localization');
const persistenceManager = this.getModule('persistence-manager');
let currentLocale = 'en-us';
let preferredVoice = '';
if (localization) {
currentLocale = localization.getLocale();
}
if (persistenceManager) {
preferredVoice = persistenceManager.getPreference('tts', 'voice', '');
}
let currentLocale = localization ? localization.getLocale() : 'en-us';
let preferredVoice = persistenceManager ? persistenceManager.getPreference('tts', 'voice', '') : '';
await this.selectVoiceForLocale(currentLocale, preferredVoice);
}
@@ -197,19 +186,45 @@ export class BrowserTTSHandler extends TTSHandler {
*/
async loadVoices() {
return new Promise(resolve => {
// Get available voices
const getVoices = () => {
// Helper function to filter and sort voices
const processVoices = () => {
this.voices = speechSynthesis.getVoices() || [];
console.log(`Browser TTS: Loaded ${this.voices.length} voices`);
// Log all available voices for debugging
console.log('Browser TTS: Raw loaded voices:',
this.voices.map(v => `${v.name} (${v.lang})`));
// Ensure we have at least one voice
if (this.voices.length === 0) {
console.warn('Browser TTS: No voices available from speech synthesis');
resolve();
return;
}
// Sort voices to prioritize English voices first
this.voices.sort((a, b) => {
// Put English voices first
const aIsEnglish = a.lang.toLowerCase().startsWith('en');
const bIsEnglish = b.lang.toLowerCase().startsWith('en');
if (aIsEnglish && !bIsEnglish) return -1;
if (!aIsEnglish && bIsEnglish) return 1;
// Then sort by language
return a.lang.localeCompare(b.lang);
});
console.log('Browser TTS: Sorted voices:',
this.voices.map(v => `${v.name} (${v.lang})`));
resolve();
};
// Some browsers need a timeout to get voices
const timeoutId = setTimeout(() => {
if (this.voices.length === 0) {
this.voices = speechSynthesis.getVoices() || [];
console.log(`Browser TTS: Loaded ${this.voices.length} voices after timeout`);
resolve();
console.log('Browser TTS: Using timeout fallback to get voices');
processVoices();
}
}, 1000);
@@ -218,15 +233,14 @@ export class BrowserTTSHandler extends TTSHandler {
if (this.voices.length > 0) {
clearTimeout(timeoutId);
console.log(`Browser TTS: Loaded ${this.voices.length} voices immediately`);
resolve();
processVoices();
} else {
// If no voices are available yet, set up the onvoiceschanged event
speechSynthesis.onvoiceschanged = () => {
clearTimeout(timeoutId);
this.voices = speechSynthesis.getVoices() || [];
console.log(`Browser TTS: Loaded ${this.voices.length} voices from event`);
console.log('Browser TTS: Voices changed event fired');
processVoices();
speechSynthesis.onvoiceschanged = null;
resolve();
};
}
});
@@ -239,8 +253,15 @@ export class BrowserTTSHandler extends TTSHandler {
* @returns {Promise<void>}
*/
async selectVoiceForLocale(locale = 'en-us', preferredVoice = '') {
// Debug voice selection process
console.log(`Browser TTS: Selecting voice for locale ${locale}, preferred voice: ${preferredVoice || 'none'}`);
console.log(`Browser TTS: Available voices:`, this.voices.map(v => `${v.name} (${v.lang})`));
// Normalize locale for comparison
const normalizedLocale = locale.toLowerCase().split('-')[0];
const normalizedLocale = locale.toLowerCase();
const languageCode = normalizedLocale.split('-')[0]; // e.g., 'en' from 'en-us'
console.log(`Browser TTS: Normalized locale: ${normalizedLocale}, language code: ${languageCode}`);
// If we have a preferred voice, try to use it first
if (preferredVoice) {
@@ -256,35 +277,57 @@ export class BrowserTTSHandler extends TTSHandler {
}
}
// Find voices matching the locale
const localeVoices = this.voices.filter(voice => {
// Find voices exactly matching the locale (e.g., 'en-us')
const exactLocaleVoices = this.voices.filter(voice => {
const voiceLocale = voice.lang.toLowerCase();
return voiceLocale.startsWith(normalizedLocale) ||
voice.name.toLowerCase().includes(normalizedLocale);
return voiceLocale === normalizedLocale;
});
if (localeVoices.length > 0) {
console.log(`Browser TTS: Found ${exactLocaleVoices.length} exact locale matches for ${normalizedLocale}`);
if (exactLocaleVoices.length > 0) {
// Use the first matching voice
this.voiceOptions.voice = localeVoices[0];
console.log(`Browser TTS: Using ${normalizedLocale} voice: ${this.voiceOptions.voice.name}`);
this.voiceOptions.voice = exactLocaleVoices[0];
console.log(`Browser TTS: Using exact locale match for ${normalizedLocale}: ${this.voiceOptions.voice.name}`);
return;
}
// If no matching voice found, try to find any voice
if (this.voices.length > 0) {
// Look for a preferred language voice (English)
// Find voices matching the language code (e.g., 'en')
const languageVoices = this.voices.filter(voice => {
const voiceLocale = voice.lang.toLowerCase();
console.log(`Browser TTS: Comparing voice lang ${voiceLocale} with language code ${languageCode}`);
return voiceLocale.startsWith(languageCode) ||
(voiceLocale.length === 2 && languageCode.startsWith(voiceLocale));
});
console.log(`Browser TTS: Found ${languageVoices.length} language matches for ${languageCode}`);
if (languageVoices.length > 0) {
// Use the first matching voice
this.voiceOptions.voice = languageVoices[0];
console.log(`Browser TTS: Using language match for ${languageCode}: ${this.voiceOptions.voice.name}`);
return;
}
// If current language is not English and no matching voice found, try to find English voices
if (languageCode !== 'en') {
const englishVoices = this.voices.filter(voice =>
voice.lang.toLowerCase().startsWith('en')
);
console.log(`Browser TTS: Found ${englishVoices.length} English voices as fallback`);
if (englishVoices.length > 0) {
this.voiceOptions.voice = englishVoices[0];
console.log(`Browser TTS: No ${normalizedLocale} voice found, using English voice: ${this.voiceOptions.voice.name}`);
} else {
// Use the first available voice
this.voiceOptions.voice = this.voices[0];
console.log(`Browser TTS: No ${normalizedLocale} or English voice found, using: ${this.voiceOptions.voice.name}`);
console.log(`Browser TTS: No ${languageCode} voice found, using English voice: ${this.voiceOptions.voice.name}`);
return;
}
}
// As a last resort, use any available voice
if (this.voices.length > 0) {
this.voiceOptions.voice = this.voices[0];
console.log(`Browser TTS: No matching voice found, using first available voice: ${this.voiceOptions.voice.name}`);
} else {
console.log("Browser TTS: No voices available");
}
@@ -306,23 +349,26 @@ export class BrowserTTSHandler extends TTSHandler {
console.log(`Browser TTS: Preloading speech for: "${processedText.substring(0, 50)}${processedText.length > 50 ? '...' : ''}"`);
// Create utterance but don't speak it yet
const utterance = new SpeechSynthesisUtterance(processedText);
// Use MediaRecorder to capture audio output to WAV
const audioData = await this.synthesizeToWav(processedText);
if (!audioData) {
console.warn("Browser TTS: Failed to generate WAV audio");
return null;
}
// Set voice and options
utterance.voice = this.voiceOptions.voice;
utterance.rate = this.voiceOptions.rate;
utterance.pitch = this.voiceOptions.pitch;
utterance.volume = this.voiceOptions.volume;
utterance.lang = this.voiceOptions.voice.lang;
// Create audio element from blob
const audio = new Audio(URL.createObjectURL(audioData.blob));
// Store preloaded data
// Store preloaded data in the centralized TTSFactory cache
const preloadData = {
utterance,
audio: audio,
blob: audioData.blob,
text: processedText
};
this.preloadCache.set(text, preloadData);
// Use the TTSFactory's cache instead of a local cache
// this.preloadCache.set(text, preloadData);
// Instead, return the preloaded data to be stored in the TTSFactory's cache
return preloadData;
} catch (error) {
console.warn("Browser TTS: Error preloading speech:", error);
@@ -330,6 +376,84 @@ export class BrowserTTSHandler extends TTSHandler {
}
}
/**
* Convert speech synthesis to WAV format
* @param {string} text - Text to synthesize
* @returns {Promise<Object>} - Object with WAV blob
*/
synthesizeToWav(text) {
return new Promise((resolve, reject) => {
try {
// Create utterance
const utterance = new SpeechSynthesisUtterance(text);
// Set voice and options
utterance.voice = this.voiceOptions.voice;
utterance.rate = this.voiceOptions.rate;
utterance.pitch = this.voiceOptions.pitch;
utterance.volume = this.voiceOptions.volume;
utterance.lang = this.voiceOptions.voice.lang;
// Use Web Audio API to capture the speech output
const audioContext = new (window.AudioContext || window.webkitAudioContext)();
const destination = audioContext.createMediaStreamDestination();
const mediaRecorder = new MediaRecorder(destination.stream);
const audioChunks = [];
// Capture the audio chunks
mediaRecorder.ondataavailable = (event) => {
if (event.data.size > 0) {
audioChunks.push(event.data);
}
};
// When recording completes
mediaRecorder.onstop = () => {
// Create a WAV blob from the audio chunks
const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
resolve({ blob: audioBlob });
};
// Set up speech synthesis events
utterance.onstart = () => {
console.log("Browser TTS: Started synthesizing audio to WAV");
mediaRecorder.start();
};
utterance.onend = () => {
console.log("Browser TTS: Finished synthesizing audio to WAV");
mediaRecorder.stop();
};
utterance.onerror = (error) => {
console.error("Browser TTS: Error synthesizing audio:", error);
reject(error);
};
// Start the speech synthesis
speechSynthesis.speak(utterance);
// If synthesis doesn't start within a reasonable timeout, reject the promise
const timeout = setTimeout(() => {
if (mediaRecorder.state === 'inactive') {
console.warn("Browser TTS: Synthesis to WAV timed out");
reject(new Error("Synthesis timed out"));
}
}, 5000);
// Clear timeout when synthesis starts
utterance.onstart = () => {
clearTimeout(timeout);
console.log("Browser TTS: Started synthesizing audio to WAV");
mediaRecorder.start();
};
} catch (error) {
console.error("Browser TTS: Error setting up WAV synthesis:", error);
reject(error);
}
});
}
/**
* Speak text using preloaded utterance
* @param {Object} preloadData - Preloaded speech data
@@ -337,7 +461,7 @@ export class BrowserTTSHandler extends TTSHandler {
* @returns {boolean} - Success status
*/
speakPreloaded(preloadData, callback = null) {
if (!this.available || !preloadData || !preloadData.utterance) {
if (!this.available || !preloadData || !preloadData.audio) {
if (callback) {
setTimeout(() => callback({ success: false, reason: 'no_preloaded_data' }), 0);
}
@@ -348,13 +472,13 @@ export class BrowserTTSHandler extends TTSHandler {
// Stop any current speech
this.stop();
const { utterance, text } = preloadData;
const { audio, text } = preloadData;
// Dispatch start event
this.dispatchEvent('tts:speak:start', { text });
// Set up event listeners
utterance.onend = () => {
audio.onended = () => {
this.currentUtterance = null;
// Dispatch end event
@@ -365,7 +489,7 @@ export class BrowserTTSHandler extends TTSHandler {
}
};
utterance.onerror = (error) => {
audio.onerror = (error) => {
this.currentUtterance = null;
// Dispatch error event
@@ -375,15 +499,15 @@ export class BrowserTTSHandler extends TTSHandler {
});
if (callback) {
callback({ success: false, reason: 'synthesis_error', error });
callback({ success: false, reason: 'audio_error', error });
}
};
// Store reference to current utterance
this.currentUtterance = utterance;
this.currentUtterance = audio;
// Speak the utterance
speechSynthesis.speak(utterance);
// Play the audio
audio.play();
return true;
} catch (error) {
@@ -396,7 +520,7 @@ export class BrowserTTSHandler extends TTSHandler {
});
if (callback) {
setTimeout(() => callback({ success: false, reason: 'synthesis_error', error }), 0);
setTimeout(() => callback({ success: false, reason: 'audio_error', error }), 0);
}
return false;
@@ -409,8 +533,8 @@ export class BrowserTTSHandler extends TTSHandler {
* @param {Function} callback - Callback for when speech completes
* @returns {boolean} - Success status
*/
speak(text, callback = null) {
if (!this.available || !this.voiceOptions.voice) {
async speak(text, callback = null) {
if (!this.available || !text) {
if (callback) {
setTimeout(() => callback({ success: false, reason: 'not_available' }), 0);
}
@@ -418,34 +542,27 @@ export class BrowserTTSHandler extends TTSHandler {
}
try {
// Stop any current speech
this.stop();
// Check if we have this in the preload cache
if (this.preloadCache.has(text)) {
const preloadData = this.preloadCache.get(text);
this.preloadCache.delete(text); // Remove from cache
return this.speakPreloaded(preloadData, callback);
}
// Process text for TTS
const processedText = this.preprocessText(text);
// Create utterance
const utterance = new SpeechSynthesisUtterance(processedText);
// Use MediaRecorder to capture audio output to WAV
const audioData = await this.synthesizeToWav(processedText);
if (!audioData) {
console.warn("Browser TTS: Failed to generate WAV audio");
if (callback) {
setTimeout(() => callback({ success: false, reason: 'synthesis_error' }), 0);
}
return false;
}
// Set voice and options
utterance.voice = this.voiceOptions.voice;
utterance.rate = this.voiceOptions.rate;
utterance.pitch = this.voiceOptions.pitch;
utterance.volume = this.voiceOptions.volume;
utterance.lang = this.voiceOptions.voice.lang;
// Create audio element from blob
const audio = new Audio(URL.createObjectURL(audioData.blob));
// Dispatch start event
this.dispatchEvent('tts:speak:start', { text: processedText });
// Set up event listeners
utterance.onend = () => {
audio.onended = () => {
this.currentUtterance = null;
// Dispatch end event
@@ -456,29 +573,29 @@ export class BrowserTTSHandler extends TTSHandler {
}
};
utterance.onerror = (error) => {
audio.onerror = (error) => {
this.currentUtterance = null;
// Dispatch error event
this.dispatchEvent('tts:speak:error', {
text: processedText,
error: error.error || 'Unknown error'
error: error.message || 'Unknown error'
});
if (callback) {
callback({ success: false, reason: 'synthesis_error', error });
callback({ success: false, reason: 'audio_error', error });
}
};
// Store reference to current utterance
this.currentUtterance = utterance;
// Store the current utterance for stopping later
this.currentUtterance = audio;
// Speak the utterance
speechSynthesis.speak(utterance);
// Play the audio
audio.play();
return true;
} catch (error) {
console.error("Browser TTS: Error generating speech:", error);
console.error("Browser TTS: Error speaking:", error);
// Dispatch error event
this.dispatchEvent('tts:speak:error', {
@@ -520,8 +637,12 @@ export class BrowserTTSHandler extends TTSHandler {
* Stop speaking
*/
stop() {
if (speechSynthesis) {
speechSynthesis.cancel();
if (this.currentUtterance) {
if (this.currentUtterance.stop) {
this.currentUtterance.stop();
} else if (this.currentUtterance.pause) {
this.currentUtterance.pause();
}
this.currentUtterance = null;
}
}
@@ -549,7 +670,7 @@ export class BrowserTTSHandler extends TTSHandler {
getVoices() {
// Get localization module for current locale
const localization = this.getModule('localization');
let currentLocale = localization ? localization.getLocale().toLowerCase() : 'en-us';
let currentLocale = localization ? localization.getLocale() : 'en-us';
// Create language code variations for matching
const languageCode = currentLocale.split('-')[0]; // e.g., 'en' from 'en-us'
@@ -563,14 +684,39 @@ export class BrowserTTSHandler extends TTSHandler {
(currentLocale.startsWith(voiceLang) && voiceLang.length === 2);
});
// If no matching voices found, fall back to all voices
const voicesToUse = filteredVoices.length > 0 ? filteredVoices : this.voices;
// If matching voices found, use them
if (filteredVoices.length > 0) {
return filteredVoices.map(voice => ({
id: voice.voiceURI,
name: voice.name,
lang: voice.lang,
gender: this.inferVoiceGender(voice.name)
}));
}
return voicesToUse.map(voice => ({
// If no matching voices found and current locale isn't English,
// try to fallback to English voices
if (languageCode !== 'en') {
const englishVoices = this.voices.filter(voice => {
const voiceLang = voice.lang.toLowerCase();
return voiceLang.startsWith('en');
});
if (englishVoices.length > 0) {
return englishVoices.map(voice => ({
id: voice.voiceURI,
name: voice.name,
lang: voice.lang,
gender: this.inferVoiceGender(voice.name)
}));
}
}
// As a last resort, return all voices
return this.voices.map(voice => ({
id: voice.voiceURI,
name: voice.name,
lang: voice.lang,
// Add proper gender field if available, otherwise infer from name
gender: this.inferVoiceGender(voice.name)
}));
}