Added support for openai api tts.

This commit is contained in:
2025-04-05 14:40:56 +00:00
parent b8e2e6e238
commit e8eb93ae1b
11 changed files with 2063 additions and 989 deletions
+30
View File
@@ -911,3 +911,33 @@ ol.choice {
color: #7a6e59;
text-align: center;
}
/* API Settings in Options Panel */
.api-settings-container {
margin-top: 10px;
padding: 10px;
border: 1px solid rgba(200, 200, 200, 0.2);
border-radius: 5px;
background-color: rgba(50, 50, 60, 0.3);
}
.api-settings-container input[type="text"],
.api-settings-container input[type="password"] {
width: 100%;
padding: 8px;
border: 1px solid #555;
border-radius: 4px;
background-color: rgba(30, 30, 35, 0.8);
color: #eee;
font-family: monospace;
}
.api-settings-container input[type="text"]::placeholder,
.api-settings-container input[type="password"]::placeholder {
color: #888;
}
.elevenlabs-setting,
.openai-setting {
display: none; /* Hidden by default, shown when the relevant provider is selected */
}
+536
View File
@@ -0,0 +1,536 @@
/**
* API TTS Handler Base Class
* Base class for API-based TTS handlers
*/
import { TTSHandler } from './tts-handler.js';
import { moduleRegistry } from './module-registry.js';
export class ApiTTSHandlerBase extends TTSHandler {
constructor(id, name) {
super();
this.id = id;
this.name = name;
// Base voice options
this.voiceOptions = {
speed: 1.0
};
// State
this.available = false;
this.isReady = false;
this.currentAudio = null;
// Common API settings
this.apiKey = '';
this.apiBaseUrl = '';
// Dependencies
this.dependencies = ['localization', 'persistence-manager'];
}
/**
* Initialize the API TTS handler
* @param {Function} progressCallback - Callback for progress updates
* @returns {Promise<boolean>} - Resolves with success status
*/
async initialize(progressCallback = null) {
try {
if (progressCallback) {
progressCallback(10, `Initializing ${this.name}`);
}
this.changeState('LOADING');
// Check for required dependencies
const localization = this.getModule('localization');
const persistenceManager = this.getModule('persistence-manager');
if (!localization) {
console.error(`${this.name}: Required dependency 'localization' not found`);
this.changeState('ERROR');
return false;
}
if (!persistenceManager) {
console.error(`${this.name}: Required dependency 'persistence-manager' not found`);
this.changeState('ERROR');
return false;
}
if (progressCallback) {
progressCallback(20, `${this.name} dependencies loaded`);
}
// Set up API key from preferences - should be empty by default
this.apiKey = persistenceManager.getPreference('tts', `${this.id}_api_key`) || '';
if (progressCallback) {
progressCallback(30, `${this.name} API key loaded`);
}
// Get default API URL
const defaultApiUrl = this.getDefaultApiBaseUrl();
console.log(`${this.name}: Default API URL: ${defaultApiUrl}`);
// Set up API base URL from preferences or use default
const savedApiUrl = persistenceManager.getPreference('tts', `${this.id}_api_url`);
this.apiBaseUrl = savedApiUrl || defaultApiUrl;
// If no API URL was saved in preferences, save the default
if (!savedApiUrl && defaultApiUrl) {
console.log(`${this.name}: Saving default API URL to preferences: ${defaultApiUrl}`);
persistenceManager.updatePreference('tts', `${this.id}_api_url`, defaultApiUrl);
}
if (progressCallback) {
progressCallback(40, `${this.name} API URL set to: ${this.apiBaseUrl}`);
}
// Set up event listeners for API key and URL changes
this.addEventListener('tts:api:keyChanged', this.handleApiKeyChanged);
this.addEventListener('tts:api:urlChanged', this.handleApiUrlChanged);
if (progressCallback) {
progressCallback(50, `${this.name} event listeners registered`);
}
// Load available voices
const voicesLoaded = await this.loadVoices();
if (progressCallback) {
progressCallback(70, `${this.name} voices loaded`);
}
// Set up voice based on preferences
await this.setupVoiceFromPreferences();
if (progressCallback) {
progressCallback(90, `${this.name} voice preferences loaded`);
}
// Set availability based on API key presence
this.available = true;
this.isReady = true;
if (progressCallback) {
const statusMessage = this.available ?
`${this.name} initialized successfully` :
`${this.name} initialized but unavailable (API key missing)`;
progressCallback(100, statusMessage);
}
this.changeState(this.available ? 'FINISHED' : 'WAITING');
return true;
} catch (error) {
console.error(`${this.name}: Initialization error:`, error);
if (progressCallback) {
progressCallback(100, `${this.name} initialization failed - ${error.message}`);
}
this.changeState('ERROR');
return false;
}
}
/**
* Get a module from the registry
* @param {string} moduleId - ID of the module to get
* @returns {Object|null} - The module or null if not found
*/
getModule(moduleId) {
return moduleRegistry.getModule(moduleId);
}
/**
* Get the default API base URL for this provider
* @returns {string} - Default API base URL
*/
getDefaultApiBaseUrl() {
// Should be implemented by subclasses
return '';
}
/**
* Set up voice based on preferences and locale
* @returns {Promise<boolean>} - Resolves with success status
*/
async setupVoiceFromPreferences() {
const persistenceManager = this.getModule('persistence-manager');
const localization = this.getModule('localization');
if (!persistenceManager || !localization) {
return false;
}
// Get current locale
const locale = localization.getLocale();
// Try to get voice preference for this specific provider
const voiceId = persistenceManager.getPreference('tts', `${this.id}_voice`);
if (voiceId) {
// Set voice from preference
this.voiceOptions.voice = voiceId;
return true;
}
// If no specific voice preference, try to select a voice for the current locale
return this.selectVoiceForLocale(locale);
}
/**
* Load available voices from API
* @returns {Promise<boolean>} - Resolves with success status
*/
async loadVoices() {
// Should be implemented by subclasses
return false;
}
/**
* Select a voice for the given locale
* @param {string} locale - Locale code
* @returns {boolean} - Success status
*/
selectVoiceForLocale(locale) {
// Should be implemented by subclasses
return this.selectDefaultVoice();
}
/**
* Select a default voice
* @returns {boolean} - Success status
*/
selectDefaultVoice() {
// Should be implemented by subclasses
return false;
}
/**
* Preload speech for a text
* @param {string} text - Text to preload
* @returns {Promise<Object>} - Preloaded audio data
*/
async preloadSpeech(text) {
// Don't try to preload if handler isn't ready, available, or if no text or API key
if (!this.isReady || !this.available || !text || !this.apiKey) {
if (!this.apiKey) {
console.log(`${this.name}: Skipping preload speech - no API key set`);
}
return null;
}
try {
// Process text for TTS
const processedText = this.preprocessText(text);
// Generate speech audio data
const audioData = await this.generateSpeechAudio(processedText);
if (!audioData) {
console.error(`${this.name}: Failed to generate audio data for preloading`);
return null;
}
// Store in centralized TTSFactory cache
const ttsFactory = this.getModule('tts-factory');
if (ttsFactory) {
ttsFactory.cacheSpeech(text, audioData);
}
// Return audio data
return audioData;
} catch (error) {
console.error(`${this.name}: Preload speech error:`, error);
return null;
}
}
/**
* Generate speech audio data
* @param {string} text - Text to generate speech for
* @returns {Promise<Object>} - Audio data (Blob)
*/
async generateSpeechAudio(text) {
// Should be implemented by subclasses
return null;
}
/**
* Speak text using preloaded audio
* @param {Object} preloadData - Preloaded audio data
* @param {Function} callback - Callback for when speech completes
* @returns {boolean} - Success status
*/
speakPreloaded(preloadData, callback = null) {
if (!this.isReady || !this.available || !preloadData) {
if (callback) {
setTimeout(() => callback({ success: false, reason: 'not_available' }), 0);
}
return false;
}
try {
// Stop any current audio
this.stop();
// Create Blob URL
const audioUrl = URL.createObjectURL(preloadData);
// Create new audio element
const audio = new Audio(audioUrl);
// Set up event handlers
audio.addEventListener('ended', () => {
// Clean up URL object
URL.revokeObjectURL(audioUrl);
// Clear current audio reference
if (this.currentAudio === audio) {
this.currentAudio = null;
}
// Dispatch completion event
this.dispatchEvent('tts:speak:complete', {});
if (callback) {
callback({ success: true });
}
}, { once: true });
audio.addEventListener('error', (error) => {
console.error(`${this.name}: Playback error:`, error);
// Clean up URL object
URL.revokeObjectURL(audioUrl);
// Dispatch error event
this.dispatchEvent('tts:speak:error', { error: error.message || 'Unknown error' });
if (callback) {
callback({ success: false, reason: 'playback_error', error });
}
}, { once: true });
// Store reference to current audio
this.currentAudio = audio;
// Play the audio
audio.play();
return true;
} catch (error) {
console.error(`${this.name}: Error playing preloaded audio:`, error);
// Dispatch error event
this.dispatchEvent('tts:speak:error', {
error: error.message || 'Unknown error'
});
if (callback) {
setTimeout(() => callback({ success: false, reason: 'playback_error', error }), 0);
}
return false;
}
}
/**
* Speak text
* @param {string} text - Text to speak
* @param {Function} callback - Callback for when speech completes
* @returns {boolean} - Success status
*/
async speak(text, callback = null) {
if (!this.isReady || !this.available || !text) {
if (callback) {
setTimeout(() => callback({ success: false, reason: 'not_available' }), 0);
}
return false;
}
try {
// Process text for TTS
const processedText = this.preprocessText(text);
// Check if already preloaded
const ttsFactory = this.getModule('tts-factory');
if (ttsFactory && ttsFactory.isSpeechCached(text)) {
return this.speakPreloaded(ttsFactory.getCachedSpeech(text), callback);
}
// Generate audio data
const audioData = await this.generateSpeechAudio(processedText);
if (!audioData) {
if (callback) {
setTimeout(() => callback({ success: false, reason: 'generation_failed' }), 0);
}
return false;
}
// Store in centralized TTSFactory cache
if (ttsFactory) {
ttsFactory.cacheSpeech(text, audioData);
}
// Play the audio
return this.speakPreloaded(audioData, callback);
} catch (error) {
console.error(`${this.name}: Error generating speech:`, error);
// Dispatch error event
this.dispatchEvent('tts:speak:error', {
text,
error: error.message || 'Unknown error'
});
if (callback) {
setTimeout(() => callback({ success: false, reason: 'generation_error', error }), 0);
}
return false;
}
}
/**
* Preprocess text for TTS
* @param {string} text - Text to preprocess
* @returns {string} - Processed text
*/
preprocessText(text) {
if (!text) return '';
// Trim whitespace
let processed = text.trim();
// Replace multiple spaces with a single space
processed = processed.replace(/\s+/g, ' ');
// Add a period at the end if there's no punctuation
if (!/[.!?]$/.test(processed)) {
processed += '.';
}
return processed;
}
/**
* Stop speaking
*/
stop() {
if (this.currentAudio) {
try {
this.currentAudio.pause();
this.currentAudio = null;
} catch (error) {
console.error(`${this.name}: Error stopping speech:`, error);
}
}
}
/**
* Check if TTS is available
* @returns {boolean} - True if TTS is available
*/
isAvailable() {
return this.available;
}
/**
* Get handler ID
* @returns {string} - Handler ID
*/
getId() {
return this.id;
}
/**
* Get available voices
* @returns {Promise<Array>} - Resolves with array of voice objects
*/
async getVoices() {
// Should be implemented by subclasses
return [];
}
/**
* Set voice options
* @param {Object} options - Voice options
*/
setVoiceOptions(options = {}) {
if (options.voice) {
this.voiceOptions.voice = options.voice;
// Save the voice preference
const persistenceManager = this.getModule('persistence-manager');
if (persistenceManager) {
persistenceManager.updatePreference('tts', `${this.id}_voice`, options.voice);
}
}
if (typeof options.speed === 'number') {
// Clamp speed between 0.5 and 2.0
this.voiceOptions.speed = Math.max(0.5, Math.min(2.0, options.speed));
}
// Additional provider-specific options should be handled by subclasses
}
/**
* Handle API key change event
* @param {Event} event - Event object
*/
handleApiKeyChanged(event) {
if (event && event.detail && event.detail.provider === this.id) {
const newKey = event.detail.key || '';
// Update API key
this.apiKey = newKey;
// Update functionality status but don't make it unavailable
// We want it to stay in the dropdown for configuration
const wasFullyFunctional = this.available;
const isFullyFunctional = !!this.apiKey;
// Only update internal state - don't change availability for UI purposes
if (isFullyFunctional) {
this.changeState('FINISHED');
} else {
// Not WAITING - we want it to stay in dropdown
this.changeState('CONFIGURING');
}
// Log the key change but don't affect availability for UI
console.log(`${this.name}: API key ${newKey ? 'set' : 'cleared'}. Fully functional: ${isFullyFunctional}`);
// Always stay available in the UI dropdown
this.available = true;
}
}
/**
* Handle API URL change event
* @param {Event} event - Event object
*/
handleApiUrlChanged(event) {
if (event && event.detail && event.detail.provider === this.id) {
const newUrl = event.detail.url || this.getDefaultApiBaseUrl();
// Update API URL
this.apiBaseUrl = newUrl;
// Save to preferences
const persistenceManager = this.getModule('persistence-manager');
if (persistenceManager) {
persistenceManager.updatePreference('tts', `${this.id}_api_url`, newUrl);
}
// Log the URL change but don't affect availability
console.log(`${this.name}: API URL updated to ${newUrl}`);
// Always stay available in the UI dropdown
this.available = true;
}
}
}
-707
View File
@@ -1,707 +0,0 @@
/**
* API TTS Handler
* Provides TTS via external APIs (e.g., ElevenLabs)
*/
import { TTSHandler } from './tts-handler.js';
import { moduleRegistry } from './module-registry.js';
export class ApiTTSHandler extends TTSHandler {
constructor() {
super();
this.id = 'api';
this.name = 'API TTS Handler';
// Voice options
this.voiceOptions = {
voice: 'pNInz6obpgDQGcFmaJgB', // Default German voice ID for ElevenLabs
model: 'eleven_multilingual_v2', // Use the multilingual model for better German
speed: 1.0
};
// State
this.available = false;
this.isReady = false;
this.currentAudio = null;
this.preloadCache = new Map();
// API endpoint
this.apiEndpoint = '/api/tts';
// Dependencies
this.dependencies = ['localization', 'persistence-manager'];
// Bind methods
this.bindMethods([
'initialize',
'speak',
'speakPreloaded',
'preloadSpeech',
'stop',
'isAvailable',
'getId',
'getVoices',
'setVoiceOptions',
'getModule',
'setupVoiceFromPreferences',
'selectVoiceForLocale',
'selectDefaultVoice'
]);
}
/**
* Get a module from the registry
* @param {string} moduleId - ID of the module to get
* @returns {Object|null} - The module or null if not found
*/
getModule(moduleId) {
return moduleRegistry.getModule(moduleId);
}
/**
* Initialize the API TTS handler
* @param {Function} progressCallback - Callback for progress updates
* @returns {Promise<boolean>} - Resolves with success status
*/
async initialize(progressCallback = null) {
try {
if (progressCallback) {
progressCallback(10, "Initializing API TTS Handler");
}
// Check for required dependencies
const localization = this.getModule('localization');
const persistenceManager = this.getModule('persistence-manager');
if (!localization) {
console.error("API TTS: Localization module not found, required dependency missing");
if (progressCallback) {
progressCallback(100, "API TTS initialization failed - missing localization");
}
return false;
}
if (!persistenceManager) {
console.error("API TTS: Persistence Manager module not found, required dependency missing");
if (progressCallback) {
progressCallback(100, "API TTS initialization failed - missing persistence manager");
}
return false;
}
// Create audio element
this.audioElement = new Audio();
if (progressCallback) {
progressCallback(30, "Loading voices");
}
// Load available voices
try {
await this.loadVoices();
} catch (error) {
console.warn("API TTS: Failed to load voices, continuing with initialization", error);
// Continue initialization even if voice loading fails
}
if (progressCallback) {
progressCallback(50, "Setting up voice preferences");
}
// Set up voice based on preferences and locale
try {
const voiceSetupSuccess = await this.setupVoiceFromPreferences();
if (!voiceSetupSuccess) {
console.warn("API TTS: Could not set up voice from preferences, using default");
}
} catch (error) {
console.warn("API TTS: Error setting up voice preferences", error);
// Continue initialization even if voice setup fails
}
// Check if API is available by making a test request
try {
if (progressCallback) {
progressCallback(70, "Checking API availability");
}
const response = await fetch(`${this.apiEndpoint}/voices`, {
method: 'GET',
headers: {
'Content-Type': 'application/json'
}
});
if (!response.ok) {
console.warn(`API TTS: API endpoint not available (${response.status} ${response.statusText}). Will use fallback.`);
this.available = false;
this.isReady = true; // Still mark as ready, just not available
if (progressCallback) {
progressCallback(100, "API TTS unavailable, using fallback");
}
// Return true to indicate the module initialized successfully
// even though the API is not available
return true;
}
const data = await response.json();
if (progressCallback) {
progressCallback(90, "API TTS available");
}
// Check for German voices and set default if available
if (data && data.voices && Array.isArray(data.voices)) {
const germanVoices = data.voices.filter(voice =>
voice.name.toLowerCase().includes('german') ||
voice.language === 'de' ||
voice.language === 'de-DE'
);
if (germanVoices.length > 0) {
// Use the first German voice as default
this.voiceOptions.voice = germanVoices[0].id;
console.log(`API TTS: Found German voice: ${germanVoices[0].name} (${germanVoices[0].id})`);
}
}
this.available = true;
this.isReady = true;
if (progressCallback) {
progressCallback(100, "API TTS Handler ready");
}
return true;
} catch (error) {
console.warn("API TTS: Error checking API availability:", error);
// Mark as ready but not available
this.available = false;
this.isReady = true;
if (progressCallback) {
progressCallback(100, "API TTS unavailable due to error");
}
// Return true to indicate the module initialized successfully
// even though the API is not available
return true;
}
} catch (error) {
console.error("Error initializing API TTS Handler:", error);
// Mark as ready but not available
this.available = false;
this.isReady = true;
if (progressCallback) {
progressCallback(100, "API TTS initialization failed");
}
// Return true to indicate the module initialized successfully
// even though there was an error
return true;
}
}
/**
* Set up voice based on preferences and locale
* @returns {Promise<boolean>} - Resolves with success status
*/
async setupVoiceFromPreferences() {
try {
// Get localization and persistence manager modules
const localization = this.getModule('localization');
const persistenceManager = this.getModule('persistence-manager');
// Both modules should be available as we checked in initialize
if (!localization || !persistenceManager) {
console.error("API TTS: Required modules not available for voice setup");
return this.selectDefaultVoice();
}
// Get current locale and preferred voice
const currentLocale = localization.getLocale();
const preferredVoice = persistenceManager.getPreference('tts', 'voice', '');
// If we have a preferred voice, use it
if (preferredVoice) {
this.voiceOptions.voice = preferredVoice;
console.log(`API TTS: Using preferred voice: ${preferredVoice}`);
return true;
}
// Otherwise select based on locale
console.log(`API TTS: No preferred voice, selecting for locale: ${currentLocale}`);
return this.selectVoiceForLocale(currentLocale);
} catch (error) {
console.error("API TTS: Error setting up voice from preferences:", error);
return this.selectDefaultVoice();
}
}
/**
* Load available voices from API
* @returns {Promise<boolean>} - Resolves with success status
*/
async loadVoices() {
try {
// Fetch available voices from API
const response = await fetch(`${this.apiEndpoint}/voices`);
if (!response.ok) {
console.warn(`API TTS: Failed to load voices - ${response.status} ${response.statusText}`);
return false;
}
const data = await response.json();
if (!data.voices || !Array.isArray(data.voices)) {
console.warn("API TTS: Invalid voice data received");
return false;
}
this.voices = data.voices;
console.log(`API TTS: Loaded ${this.voices.length} voices`);
return true;
} catch (error) {
console.error("Error loading API TTS voices:", error);
return false;
}
}
/**
* Select a voice for the given locale
* @param {string} locale - Locale code
* @returns {boolean} - Success status
*/
selectVoiceForLocale(locale) {
if (!locale || this.voices.length === 0) {
return this.selectDefaultVoice();
}
// Normalize locale
const normalizedLocale = locale.toLowerCase();
// Try to find a voice for the exact locale
let matchingVoice = this.voices.find(voice =>
voice.lang && voice.lang.toLowerCase() === normalizedLocale
);
// If no exact match, try to find a voice for the language part
if (!matchingVoice) {
const langPart = normalizedLocale.split('-')[0];
matchingVoice = this.voices.find(voice =>
voice.lang && voice.lang.toLowerCase().startsWith(langPart)
);
}
// If still no match, use default
if (!matchingVoice) {
return this.selectDefaultVoice();
}
// Set the matching voice
this.voiceOptions.voice = matchingVoice.id;
console.log(`API TTS: Selected voice ${matchingVoice.name} for locale ${locale}`);
// Update preference
const persistenceManager = this.getModule('persistence-manager');
if (persistenceManager) {
persistenceManager.updatePreference('tts', 'voice', matchingVoice.id || matchingVoice.name);
}
return true;
}
/**
* Select a default voice
* @returns {boolean} - Success status
*/
selectDefaultVoice() {
if (this.voices.length === 0) {
console.warn("API TTS: No voices available for default selection");
return false;
}
// Prefer English voices if available
const englishVoice = this.voices.find(voice =>
voice.lang && voice.lang.toLowerCase().startsWith('en')
);
if (englishVoice) {
this.voiceOptions.voice = englishVoice.id;
console.log(`API TTS: Selected default English voice ${englishVoice.name}`);
} else {
// Otherwise use the first available voice
this.voiceOptions.voice = this.voices[0].id;
console.log(`API TTS: Selected first available voice ${this.voices[0].name}`);
}
// Update preference
const persistenceManager = this.getModule('persistence-manager');
if (persistenceManager) {
persistenceManager.updatePreference('tts', 'voice', this.voiceOptions.voice);
}
return true;
}
/**
* Preload speech for a text
* @param {string} text - Text to preload
* @returns {Promise<Object>} - Preloaded audio data
*/
async preloadSpeech(text) {
if (!this.available || !text) {
return null;
}
try {
// Process text for TTS
const processedText = this.preprocessText(text);
console.log(`API TTS: Preloading speech for: "${processedText.substring(0, 50)}${processedText.length > 50 ? '...' : ''}"`);
// Make API request to generate speech
const response = await fetch(this.apiEndpoint, {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify({
text: processedText,
voice_id: this.voiceOptions.voice,
model_id: this.voiceOptions.model,
speed: this.voiceOptions.speed
})
});
if (!response.ok) {
throw new Error(`API error: ${response.status} ${response.statusText}`);
}
// Get audio blob
const audioBlob = await response.blob();
// Create audio element but don't play it
const audioUrl = URL.createObjectURL(audioBlob);
const audio = new Audio(audioUrl);
// Store preloaded data
const preloadData = {
audio,
url: audioUrl,
text: processedText
};
this.preloadCache.set(text, preloadData);
return preloadData;
} catch (error) {
console.warn("API TTS: Error preloading speech:", error);
return null;
}
}
/**
* Speak text using preloaded audio
* @param {Object} preloadData - Preloaded audio data
* @param {Function} callback - Callback for when speech completes
* @returns {boolean} - Success status
*/
speakPreloaded(preloadData, callback = null) {
if (!this.available || !preloadData || !preloadData.audio) {
if (callback) {
setTimeout(() => callback({ success: false, reason: 'no_preloaded_data' }), 0);
}
return false;
}
try {
// Stop any current speech
this.stop();
const { audio, url, text } = preloadData;
// Dispatch start event
this.dispatchEvent('tts:speak:start', { text });
// Set up event listeners
audio.addEventListener('ended', () => {
this.currentAudio = null;
// Clean up URL object
URL.revokeObjectURL(url);
// Dispatch end event
this.dispatchEvent('tts:speak:end', { text });
if (callback) {
callback({ success: true });
}
}, { once: true });
audio.addEventListener('error', (error) => {
this.currentAudio = null;
// Clean up URL object
URL.revokeObjectURL(url);
// Dispatch error event
this.dispatchEvent('tts:speak:error', {
text,
error: error.message || 'Unknown error'
});
if (callback) {
callback({ success: false, reason: 'playback_error', error });
}
}, { once: true });
// Store reference to current audio
this.currentAudio = audio;
// Play the audio
audio.play();
return true;
} catch (error) {
console.error("API TTS: Error playing preloaded speech:", error);
// Dispatch error event
this.dispatchEvent('tts:speak:error', {
text: preloadData.text,
error: error.message || 'Unknown error'
});
if (callback) {
setTimeout(() => callback({ success: false, reason: 'playback_error', error }), 0);
}
return false;
}
}
/**
* Speak text
* @param {string} text - Text to speak
* @param {Function} callback - Callback for when speech completes
* @returns {boolean} - Success status
*/
async speak(text, callback = null) {
if (!this.available) {
if (callback) {
setTimeout(() => callback({ success: false, reason: 'not_available' }), 0);
}
return false;
}
try {
// Stop any current speech
this.stop();
// Check if we have this in the preload cache
if (this.preloadCache.has(text)) {
const preloadData = this.preloadCache.get(text);
this.preloadCache.delete(text); // Remove from cache
return this.speakPreloaded(preloadData, callback);
}
// Process text for TTS
const processedText = this.preprocessText(text);
// Dispatch start event
this.dispatchEvent('tts:speak:start', { text: processedText });
// Make API request to generate speech
const response = await fetch(this.apiEndpoint, {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify({
text: processedText,
voice_id: this.voiceOptions.voice,
model_id: this.voiceOptions.model,
speed: this.voiceOptions.speed
})
});
if (!response.ok) {
throw new Error(`API error: ${response.status} ${response.statusText}`);
}
// Get audio blob
const audioBlob = await response.blob();
// Create audio element
const audioUrl = URL.createObjectURL(audioBlob);
const audio = new Audio(audioUrl);
// Set up event listeners
audio.addEventListener('ended', () => {
this.currentAudio = null;
// Clean up URL object
URL.revokeObjectURL(audioUrl);
// Dispatch end event
this.dispatchEvent('tts:speak:end', { text: processedText });
if (callback) {
callback({ success: true });
}
}, { once: true });
audio.addEventListener('error', (error) => {
this.currentAudio = null;
// Clean up URL object
URL.revokeObjectURL(audioUrl);
// Dispatch error event
this.dispatchEvent('tts:speak:error', {
text: processedText,
error: error.message || 'Unknown error'
});
if (callback) {
callback({ success: false, reason: 'playback_error', error });
}
}, { once: true });
// Store reference to current audio
this.currentAudio = audio;
// Play the audio
audio.play();
return true;
} catch (error) {
console.error("API TTS: Error generating speech:", error);
// Dispatch error event
this.dispatchEvent('tts:speak:error', {
text,
error: error.message || 'Unknown error'
});
if (callback) {
setTimeout(() => callback({ success: false, reason: 'generation_error', error }), 0);
}
return false;
}
}
/**
* Preprocess text for TTS
* @param {string} text - Text to preprocess
* @returns {string} - Processed text
*/
preprocessText(text) {
if (!text) return '';
// Trim whitespace
let processed = text.trim();
// Replace multiple spaces with a single space
processed = processed.replace(/\s+/g, ' ');
// Add a period at the end if there's no punctuation
if (!/[.!?]$/.test(processed)) {
processed += '.';
}
return processed;
}
/**
* Stop speaking
*/
stop() {
if (this.currentAudio) {
try {
this.currentAudio.pause();
this.currentAudio = null;
} catch (error) {
console.error("API TTS: Error stopping speech:", error);
}
}
}
/**
* Check if TTS is available
* @returns {boolean} - True if TTS is available
*/
isAvailable() {
return this.available;
}
/**
* Get handler ID
* @returns {string} - Handler ID
*/
getId() {
return this.id;
}
/**
* Get available voices
* @returns {Promise<Array>} - Resolves with array of voice objects
*/
async getVoices() {
if (!this.available) {
return [];
}
try {
const response = await fetch(`${this.apiEndpoint}/voices`, {
method: 'GET',
headers: {
'Content-Type': 'application/json'
}
});
if (!response.ok) {
throw new Error(`API error: ${response.status} ${response.statusText}`);
}
const data = await response.json();
if (data && data.voices && Array.isArray(data.voices)) {
return data.voices.map(voice => ({
id: voice.id,
name: voice.name,
language: voice.language || 'unknown'
}));
}
return [];
} catch (error) {
console.error("API TTS: Error getting voices:", error);
return [];
}
}
/**
* Set voice options
* @param {Object} options - Voice options
*/
setVoiceOptions(options = {}) {
if (options.voice) {
this.voiceOptions.voice = options.voice;
}
if (options.model) {
this.voiceOptions.model = options.model;
}
if (typeof options.speed === 'number') {
// Clamp speed between 0.5 and 2.0
this.voiceOptions.speed = Math.max(0.5, Math.min(2.0, options.speed));
}
}
}
+327 -181
View File
@@ -23,7 +23,6 @@ export class BrowserTTSHandler extends TTSHandler {
this.available = false;
this.voices = [];
this.currentUtterance = null;
this.preloadCache = new Map();
// Add dependencies
this.dependencies = ['localization', 'persistence-manager'];
@@ -61,114 +60,110 @@ export class BrowserTTSHandler extends TTSHandler {
async initialize(progressCallback = null) {
try {
if (progressCallback) {
progressCallback(10, "Initializing Browser TTS Handler");
progressCallback(10, 'Initializing Browser TTS');
}
// Check if the browser supports speech synthesis
this.changeState('LOADING');
// Check for browser support
if (!window.speechSynthesis) {
console.error("Browser TTS: Speech synthesis not supported by browser");
console.warn('Browser TTS: Speech synthesis not available in this browser');
if (progressCallback) {
progressCallback(100, "Browser TTS unavailable");
progressCallback(100, 'Browser TTS not available');
}
this.changeState('ERROR');
return false;
}
if (progressCallback) {
progressCallback(30, "Loading voices");
progressCallback(30, 'Browser TTS supported');
}
try {
// Load available voices
await this.loadVoices();
if (progressCallback) {
progressCallback(70, "Setting up voice");
}
// Get localization module
// Check for required dependencies
const localization = this.getModule('localization');
const persistenceManager = this.getModule('persistence-manager');
// Get current locale and preferred voice
let currentLocale = 'en-us';
let preferredVoice = '';
if (localization) {
currentLocale = localization.getLocale();
} else {
console.error("Browser TTS: Localization module not found");
}
if (persistenceManager) {
preferredVoice = persistenceManager.getPreference('tts', 'voice', '');
} else {
console.error("Browser TTS: Persistence Manager module not found");
}
// Set voice based on locale and preferences
await this.selectVoiceForLocale(currentLocale, preferredVoice);
// Check if we have a voice set
if (this.voiceOptions.voice) {
this.available = true;
this.isReady = true;
if (progressCallback) {
progressCallback(100, "Browser TTS Handler ready");
}
return true;
} else {
// Try one more time with a delay
console.log("Browser TTS: No voice set, trying again after delay");
if (progressCallback) {
progressCallback(80, "Retrying voice loading");
}
// Wait a bit and try again
return new Promise(resolve => {
setTimeout(async () => {
await this.loadVoices();
await this.selectVoiceForLocale(currentLocale, preferredVoice);
if (this.voiceOptions.voice) {
this.available = true;
this.isReady = true;
if (progressCallback) {
progressCallback(100, "Browser TTS Handler ready");
}
resolve(true);
} else {
console.error("Browser TTS: Failed to set voice after retry");
if (progressCallback) {
progressCallback(100, "Browser TTS initialization failed");
}
resolve(false);
}
}, 1000);
});
}
} catch (error) {
console.error("Browser TTS: Error loading voices:", error);
if (progressCallback) {
progressCallback(100, "Browser TTS initialization failed");
}
if (!localization) {
console.error('Browser TTS: Required dependency \'localization\' not found');
this.changeState('ERROR');
return false;
}
} catch (error) {
console.error("Browser TTS: Initialization error:", error);
if (progressCallback) {
progressCallback(100, "Browser TTS initialization failed");
if (!persistenceManager) {
console.error('Browser TTS: Required dependency \'persistence-manager\' not found');
this.changeState('ERROR');
return false;
}
if (progressCallback) {
progressCallback(40, 'Browser TTS dependencies loaded');
}
// Load voices - but don't fail initialization if no voices are found yet
// The browser may provide voices later
try {
await this.loadVoices();
console.log(`Browser TTS: Loaded ${this.voices.length} voices initially`);
} catch (error) {
console.warn('Browser TTS: Error loading voices initially:', error);
// Don't fail initialization - voices may become available later
this.voices = [];
}
if (progressCallback) {
progressCallback(60, `Browser TTS loaded ${this.voices.length} voices`);
}
// Set speech options from preferences
try {
const rate = persistenceManager.getPreference('tts', 'speed', 1.0);
const pitch = persistenceManager.getPreference('tts', 'pitch', 1.0);
const volume = persistenceManager.getPreference('tts', 'volume', 1.0);
this.options.rate = parseFloat(rate);
this.options.pitch = parseFloat(pitch);
this.options.volume = parseFloat(volume);
// Log all available voices for debugging
console.log('Browser TTS: Available voices:', this.voices.map(v => `${v.name} (${v.lang})`));
// Set voice based on locale
const locale = localization.getLocale();
console.log(`Browser TTS: Setting voice for locale: ${locale}`);
const preferredVoice = persistenceManager.getPreference('tts', 'browser_voice');
await this.selectVoiceForLocale(locale, preferredVoice);
if (progressCallback) {
progressCallback(80, 'Browser TTS voice selected');
}
} catch (error) {
console.warn('Browser TTS: Error setting speech options:', error);
// Don't fail initialization due to voice selection issues
}
// If voices were loaded but no voice is selected, try to set a default
if (this.voices.length > 0 && !this.voiceOptions.voice) {
console.warn('Browser TTS: No voice selected after initialization, trying fallback');
this.voiceOptions.voice = this.voices[0];
}
// Always mark as available if speech synthesis is supported, regardless of voice selection
// This ensures the Browser TTS option always appears in the dropdown
this.available = true;
this.isReady = true;
if (progressCallback) {
progressCallback(100, 'Browser TTS initialized');
}
this.changeState('FINISHED');
return true;
} catch (error) {
console.error('Browser TTS: Initialization error:', error);
if (progressCallback) {
progressCallback(100, `Browser TTS initialization failed - ${error.message}`);
}
this.changeState('ERROR');
return false;
}
}
@@ -180,14 +175,8 @@ export class BrowserTTSHandler extends TTSHandler {
await this.loadVoices();
const localization = this.getModule('localization');
const persistenceManager = this.getModule('persistence-manager');
let currentLocale = 'en-us';
let preferredVoice = '';
if (localization) {
currentLocale = localization.getLocale();
}
if (persistenceManager) {
preferredVoice = persistenceManager.getPreference('tts', 'voice', '');
}
let currentLocale = localization ? localization.getLocale() : 'en-us';
let preferredVoice = persistenceManager ? persistenceManager.getPreference('tts', 'voice', '') : '';
await this.selectVoiceForLocale(currentLocale, preferredVoice);
}
@@ -197,19 +186,45 @@ export class BrowserTTSHandler extends TTSHandler {
*/
async loadVoices() {
return new Promise(resolve => {
// Get available voices
const getVoices = () => {
// Helper function to filter and sort voices
const processVoices = () => {
this.voices = speechSynthesis.getVoices() || [];
console.log(`Browser TTS: Loaded ${this.voices.length} voices`);
// Log all available voices for debugging
console.log('Browser TTS: Raw loaded voices:',
this.voices.map(v => `${v.name} (${v.lang})`));
// Ensure we have at least one voice
if (this.voices.length === 0) {
console.warn('Browser TTS: No voices available from speech synthesis');
resolve();
return;
}
// Sort voices to prioritize English voices first
this.voices.sort((a, b) => {
// Put English voices first
const aIsEnglish = a.lang.toLowerCase().startsWith('en');
const bIsEnglish = b.lang.toLowerCase().startsWith('en');
if (aIsEnglish && !bIsEnglish) return -1;
if (!aIsEnglish && bIsEnglish) return 1;
// Then sort by language
return a.lang.localeCompare(b.lang);
});
console.log('Browser TTS: Sorted voices:',
this.voices.map(v => `${v.name} (${v.lang})`));
resolve();
};
// Some browsers need a timeout to get voices
const timeoutId = setTimeout(() => {
if (this.voices.length === 0) {
this.voices = speechSynthesis.getVoices() || [];
console.log(`Browser TTS: Loaded ${this.voices.length} voices after timeout`);
resolve();
console.log('Browser TTS: Using timeout fallback to get voices');
processVoices();
}
}, 1000);
@@ -218,15 +233,14 @@ export class BrowserTTSHandler extends TTSHandler {
if (this.voices.length > 0) {
clearTimeout(timeoutId);
console.log(`Browser TTS: Loaded ${this.voices.length} voices immediately`);
resolve();
processVoices();
} else {
// If no voices are available yet, set up the onvoiceschanged event
speechSynthesis.onvoiceschanged = () => {
clearTimeout(timeoutId);
this.voices = speechSynthesis.getVoices() || [];
console.log(`Browser TTS: Loaded ${this.voices.length} voices from event`);
console.log('Browser TTS: Voices changed event fired');
processVoices();
speechSynthesis.onvoiceschanged = null;
resolve();
};
}
});
@@ -239,8 +253,15 @@ export class BrowserTTSHandler extends TTSHandler {
* @returns {Promise<void>}
*/
async selectVoiceForLocale(locale = 'en-us', preferredVoice = '') {
// Debug voice selection process
console.log(`Browser TTS: Selecting voice for locale ${locale}, preferred voice: ${preferredVoice || 'none'}`);
console.log(`Browser TTS: Available voices:`, this.voices.map(v => `${v.name} (${v.lang})`));
// Normalize locale for comparison
const normalizedLocale = locale.toLowerCase().split('-')[0];
const normalizedLocale = locale.toLowerCase();
const languageCode = normalizedLocale.split('-')[0]; // e.g., 'en' from 'en-us'
console.log(`Browser TTS: Normalized locale: ${normalizedLocale}, language code: ${languageCode}`);
// If we have a preferred voice, try to use it first
if (preferredVoice) {
@@ -256,35 +277,57 @@ export class BrowserTTSHandler extends TTSHandler {
}
}
// Find voices matching the locale
const localeVoices = this.voices.filter(voice => {
// Find voices exactly matching the locale (e.g., 'en-us')
const exactLocaleVoices = this.voices.filter(voice => {
const voiceLocale = voice.lang.toLowerCase();
return voiceLocale.startsWith(normalizedLocale) ||
voice.name.toLowerCase().includes(normalizedLocale);
return voiceLocale === normalizedLocale;
});
if (localeVoices.length > 0) {
console.log(`Browser TTS: Found ${exactLocaleVoices.length} exact locale matches for ${normalizedLocale}`);
if (exactLocaleVoices.length > 0) {
// Use the first matching voice
this.voiceOptions.voice = localeVoices[0];
console.log(`Browser TTS: Using ${normalizedLocale} voice: ${this.voiceOptions.voice.name}`);
this.voiceOptions.voice = exactLocaleVoices[0];
console.log(`Browser TTS: Using exact locale match for ${normalizedLocale}: ${this.voiceOptions.voice.name}`);
return;
}
// If no matching voice found, try to find any voice
if (this.voices.length > 0) {
// Look for a preferred language voice (English)
// Find voices matching the language code (e.g., 'en')
const languageVoices = this.voices.filter(voice => {
const voiceLocale = voice.lang.toLowerCase();
console.log(`Browser TTS: Comparing voice lang ${voiceLocale} with language code ${languageCode}`);
return voiceLocale.startsWith(languageCode) ||
(voiceLocale.length === 2 && languageCode.startsWith(voiceLocale));
});
console.log(`Browser TTS: Found ${languageVoices.length} language matches for ${languageCode}`);
if (languageVoices.length > 0) {
// Use the first matching voice
this.voiceOptions.voice = languageVoices[0];
console.log(`Browser TTS: Using language match for ${languageCode}: ${this.voiceOptions.voice.name}`);
return;
}
// If current language is not English and no matching voice found, try to find English voices
if (languageCode !== 'en') {
const englishVoices = this.voices.filter(voice =>
voice.lang.toLowerCase().startsWith('en')
);
console.log(`Browser TTS: Found ${englishVoices.length} English voices as fallback`);
if (englishVoices.length > 0) {
this.voiceOptions.voice = englishVoices[0];
console.log(`Browser TTS: No ${normalizedLocale} voice found, using English voice: ${this.voiceOptions.voice.name}`);
} else {
// Use the first available voice
this.voiceOptions.voice = this.voices[0];
console.log(`Browser TTS: No ${normalizedLocale} or English voice found, using: ${this.voiceOptions.voice.name}`);
console.log(`Browser TTS: No ${languageCode} voice found, using English voice: ${this.voiceOptions.voice.name}`);
return;
}
}
// As a last resort, use any available voice
if (this.voices.length > 0) {
this.voiceOptions.voice = this.voices[0];
console.log(`Browser TTS: No matching voice found, using first available voice: ${this.voiceOptions.voice.name}`);
} else {
console.log("Browser TTS: No voices available");
}
@@ -306,8 +349,43 @@ export class BrowserTTSHandler extends TTSHandler {
console.log(`Browser TTS: Preloading speech for: "${processedText.substring(0, 50)}${processedText.length > 50 ? '...' : ''}"`);
// Create utterance but don't speak it yet
const utterance = new SpeechSynthesisUtterance(processedText);
// Use MediaRecorder to capture audio output to WAV
const audioData = await this.synthesizeToWav(processedText);
if (!audioData) {
console.warn("Browser TTS: Failed to generate WAV audio");
return null;
}
// Create audio element from blob
const audio = new Audio(URL.createObjectURL(audioData.blob));
// Store preloaded data in the centralized TTSFactory cache
const preloadData = {
audio: audio,
blob: audioData.blob,
text: processedText
};
// Use the TTSFactory's cache instead of a local cache
// this.preloadCache.set(text, preloadData);
// Instead, return the preloaded data to be stored in the TTSFactory's cache
return preloadData;
} catch (error) {
console.warn("Browser TTS: Error preloading speech:", error);
return null;
}
}
/**
* Convert speech synthesis to WAV format
* @param {string} text - Text to synthesize
* @returns {Promise<Object>} - Object with WAV blob
*/
synthesizeToWav(text) {
return new Promise((resolve, reject) => {
try {
// Create utterance
const utterance = new SpeechSynthesisUtterance(text);
// Set voice and options
utterance.voice = this.voiceOptions.voice;
@@ -316,18 +394,64 @@ export class BrowserTTSHandler extends TTSHandler {
utterance.volume = this.voiceOptions.volume;
utterance.lang = this.voiceOptions.voice.lang;
// Store preloaded data
const preloadData = {
utterance,
text: processedText
// Use Web Audio API to capture the speech output
const audioContext = new (window.AudioContext || window.webkitAudioContext)();
const destination = audioContext.createMediaStreamDestination();
const mediaRecorder = new MediaRecorder(destination.stream);
const audioChunks = [];
// Capture the audio chunks
mediaRecorder.ondataavailable = (event) => {
if (event.data.size > 0) {
audioChunks.push(event.data);
}
};
this.preloadCache.set(text, preloadData);
return preloadData;
} catch (error) {
console.warn("Browser TTS: Error preloading speech:", error);
return null;
// When recording completes
mediaRecorder.onstop = () => {
// Create a WAV blob from the audio chunks
const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
resolve({ blob: audioBlob });
};
// Set up speech synthesis events
utterance.onstart = () => {
console.log("Browser TTS: Started synthesizing audio to WAV");
mediaRecorder.start();
};
utterance.onend = () => {
console.log("Browser TTS: Finished synthesizing audio to WAV");
mediaRecorder.stop();
};
utterance.onerror = (error) => {
console.error("Browser TTS: Error synthesizing audio:", error);
reject(error);
};
// Start the speech synthesis
speechSynthesis.speak(utterance);
// If synthesis doesn't start within a reasonable timeout, reject the promise
const timeout = setTimeout(() => {
if (mediaRecorder.state === 'inactive') {
console.warn("Browser TTS: Synthesis to WAV timed out");
reject(new Error("Synthesis timed out"));
}
}, 5000);
// Clear timeout when synthesis starts
utterance.onstart = () => {
clearTimeout(timeout);
console.log("Browser TTS: Started synthesizing audio to WAV");
mediaRecorder.start();
};
} catch (error) {
console.error("Browser TTS: Error setting up WAV synthesis:", error);
reject(error);
}
});
}
/**
@@ -337,7 +461,7 @@ export class BrowserTTSHandler extends TTSHandler {
* @returns {boolean} - Success status
*/
speakPreloaded(preloadData, callback = null) {
if (!this.available || !preloadData || !preloadData.utterance) {
if (!this.available || !preloadData || !preloadData.audio) {
if (callback) {
setTimeout(() => callback({ success: false, reason: 'no_preloaded_data' }), 0);
}
@@ -348,13 +472,13 @@ export class BrowserTTSHandler extends TTSHandler {
// Stop any current speech
this.stop();
const { utterance, text } = preloadData;
const { audio, text } = preloadData;
// Dispatch start event
this.dispatchEvent('tts:speak:start', { text });
// Set up event listeners
utterance.onend = () => {
audio.onended = () => {
this.currentUtterance = null;
// Dispatch end event
@@ -365,7 +489,7 @@ export class BrowserTTSHandler extends TTSHandler {
}
};
utterance.onerror = (error) => {
audio.onerror = (error) => {
this.currentUtterance = null;
// Dispatch error event
@@ -375,15 +499,15 @@ export class BrowserTTSHandler extends TTSHandler {
});
if (callback) {
callback({ success: false, reason: 'synthesis_error', error });
callback({ success: false, reason: 'audio_error', error });
}
};
// Store reference to current utterance
this.currentUtterance = utterance;
this.currentUtterance = audio;
// Speak the utterance
speechSynthesis.speak(utterance);
// Play the audio
audio.play();
return true;
} catch (error) {
@@ -396,7 +520,7 @@ export class BrowserTTSHandler extends TTSHandler {
});
if (callback) {
setTimeout(() => callback({ success: false, reason: 'synthesis_error', error }), 0);
setTimeout(() => callback({ success: false, reason: 'audio_error', error }), 0);
}
return false;
@@ -409,8 +533,8 @@ export class BrowserTTSHandler extends TTSHandler {
* @param {Function} callback - Callback for when speech completes
* @returns {boolean} - Success status
*/
speak(text, callback = null) {
if (!this.available || !this.voiceOptions.voice) {
async speak(text, callback = null) {
if (!this.available || !text) {
if (callback) {
setTimeout(() => callback({ success: false, reason: 'not_available' }), 0);
}
@@ -418,34 +542,27 @@ export class BrowserTTSHandler extends TTSHandler {
}
try {
// Stop any current speech
this.stop();
// Check if we have this in the preload cache
if (this.preloadCache.has(text)) {
const preloadData = this.preloadCache.get(text);
this.preloadCache.delete(text); // Remove from cache
return this.speakPreloaded(preloadData, callback);
}
// Process text for TTS
const processedText = this.preprocessText(text);
// Create utterance
const utterance = new SpeechSynthesisUtterance(processedText);
// Use MediaRecorder to capture audio output to WAV
const audioData = await this.synthesizeToWav(processedText);
if (!audioData) {
console.warn("Browser TTS: Failed to generate WAV audio");
if (callback) {
setTimeout(() => callback({ success: false, reason: 'synthesis_error' }), 0);
}
return false;
}
// Set voice and options
utterance.voice = this.voiceOptions.voice;
utterance.rate = this.voiceOptions.rate;
utterance.pitch = this.voiceOptions.pitch;
utterance.volume = this.voiceOptions.volume;
utterance.lang = this.voiceOptions.voice.lang;
// Create audio element from blob
const audio = new Audio(URL.createObjectURL(audioData.blob));
// Dispatch start event
this.dispatchEvent('tts:speak:start', { text: processedText });
// Set up event listeners
utterance.onend = () => {
audio.onended = () => {
this.currentUtterance = null;
// Dispatch end event
@@ -456,29 +573,29 @@ export class BrowserTTSHandler extends TTSHandler {
}
};
utterance.onerror = (error) => {
audio.onerror = (error) => {
this.currentUtterance = null;
// Dispatch error event
this.dispatchEvent('tts:speak:error', {
text: processedText,
error: error.error || 'Unknown error'
error: error.message || 'Unknown error'
});
if (callback) {
callback({ success: false, reason: 'synthesis_error', error });
callback({ success: false, reason: 'audio_error', error });
}
};
// Store reference to current utterance
this.currentUtterance = utterance;
// Store the current utterance for stopping later
this.currentUtterance = audio;
// Speak the utterance
speechSynthesis.speak(utterance);
// Play the audio
audio.play();
return true;
} catch (error) {
console.error("Browser TTS: Error generating speech:", error);
console.error("Browser TTS: Error speaking:", error);
// Dispatch error event
this.dispatchEvent('tts:speak:error', {
@@ -520,8 +637,12 @@ export class BrowserTTSHandler extends TTSHandler {
* Stop speaking
*/
stop() {
if (speechSynthesis) {
speechSynthesis.cancel();
if (this.currentUtterance) {
if (this.currentUtterance.stop) {
this.currentUtterance.stop();
} else if (this.currentUtterance.pause) {
this.currentUtterance.pause();
}
this.currentUtterance = null;
}
}
@@ -549,7 +670,7 @@ export class BrowserTTSHandler extends TTSHandler {
getVoices() {
// Get localization module for current locale
const localization = this.getModule('localization');
let currentLocale = localization ? localization.getLocale().toLowerCase() : 'en-us';
let currentLocale = localization ? localization.getLocale() : 'en-us';
// Create language code variations for matching
const languageCode = currentLocale.split('-')[0]; // e.g., 'en' from 'en-us'
@@ -563,14 +684,39 @@ export class BrowserTTSHandler extends TTSHandler {
(currentLocale.startsWith(voiceLang) && voiceLang.length === 2);
});
// If no matching voices found, fall back to all voices
const voicesToUse = filteredVoices.length > 0 ? filteredVoices : this.voices;
return voicesToUse.map(voice => ({
// If matching voices found, use them
if (filteredVoices.length > 0) {
return filteredVoices.map(voice => ({
id: voice.voiceURI,
name: voice.name,
lang: voice.lang,
gender: this.inferVoiceGender(voice.name)
}));
}
// If no matching voices found and current locale isn't English,
// try to fallback to English voices
if (languageCode !== 'en') {
const englishVoices = this.voices.filter(voice => {
const voiceLang = voice.lang.toLowerCase();
return voiceLang.startsWith('en');
});
if (englishVoices.length > 0) {
return englishVoices.map(voice => ({
id: voice.voiceURI,
name: voice.name,
lang: voice.lang,
gender: this.inferVoiceGender(voice.name)
}));
}
}
// As a last resort, return all voices
return this.voices.map(voice => ({
id: voice.voiceURI,
name: voice.name,
lang: voice.lang,
// Add proper gender field if available, otherwise infer from name
gender: this.inferVoiceGender(voice.name)
}));
}
+330
View File
@@ -0,0 +1,330 @@
/**
* ElevenLabs TTS Handler
* Provides TTS via ElevenLabs API
*/
import { ApiTTSHandlerBase } from './api-tts-handler-base.js';
import { moduleRegistry } from './module-registry.js';
export class ElevenLabsTTSHandler extends ApiTTSHandlerBase {
constructor() {
super('elevenlabs', 'ElevenLabs TTS');
// Voice options specific to ElevenLabs
this.voiceOptions = {
voice: 'pNInz6obpgDQGcFmaJgB', // Default voice ID for ElevenLabs
model: 'eleven_multilingual_v2', // Use the multilingual model
speed: 1.0
};
// Bind methods
this.bindMethods([
'initialize',
'speak',
'speakPreloaded',
'preloadSpeech',
'stop',
'isAvailable',
'getId',
'getVoices',
'setVoiceOptions',
'getModule',
'setupVoiceFromPreferences',
'loadVoices',
'selectVoiceForLocale',
'selectDefaultVoice',
'generateSpeechAudio',
'getDefaultApiBaseUrl'
]);
}
/**
* Initialize the ElevenLabs TTS handler
* @param {Function} progressCallback - Callback for progress updates
* @returns {Promise<boolean>} - Resolves with success status
*/
async initialize(progressCallback = null) {
try {
if (progressCallback) {
progressCallback(10, 'Initializing ElevenLabs TTS');
}
// Call parent initialize method
const initSuccess = await super.initialize(progressCallback);
if (!initSuccess) {
return false;
}
if (progressCallback) {
progressCallback(40, 'ElevenLabs TTS dependencies loaded');
}
// Set default voices in case API call fails
this.voices = [
{ id: 'pNInz6obpgDQGcFmaJgB', name: 'Rachel', language: 'en' },
{ id: '21m00Tcm4TlvDq8ikWAM', name: 'Adam', language: 'en' },
{ id: 'AZnzlk1XvdvUeBnXmlld', name: 'Antoni', language: 'en' },
{ id: 'EXAVITQu4vr4xnSDxMaL', name: 'Bella', language: 'en' },
{ id: 'ErXwobaYiN019PkySvjV', name: 'Daniel', language: 'en' }
];
// Load voice preferences
const persistenceManager = this.getModule('persistence-manager');
if (persistenceManager) {
// Load model preference
const model = persistenceManager.getPreference('tts', 'elevenlabs_model', 'eleven_multilingual_v2');
if (model) {
this.voiceOptions.model = model;
}
// Load voice preference
const voice = persistenceManager.getPreference('tts', 'elevenlabs_voice');
if (voice) {
this.voiceOptions.voice = voice;
}
}
if (progressCallback) {
progressCallback(60, 'ElevenLabs TTS preferences loaded');
}
// Only attempt to load voices from API if we have an API key
if (this.apiKey) {
try {
await this.loadVoices();
console.log(`ElevenLabs TTS: Loaded ${this.voices.length} voices from API`);
} catch (error) {
console.warn('ElevenLabs TTS: Could not load voices from API, using defaults');
// Don't fail initialization, we already have default voices
}
} else {
console.log('ElevenLabs TTS: No API key provided, using default voices');
// Mark as available but not fully functional
this.available = true;
}
if (progressCallback) {
progressCallback(80, `ElevenLabs TTS loaded ${this.voices.length} voices`);
}
// Set voice based on locale
const localization = this.getModule('localization');
if (localization) {
const locale = localization.getLocale();
console.log(`ElevenLabs TTS: Setting voice for locale: ${locale}`);
this.selectVoiceForLocale(locale);
} else {
this.selectDefaultVoice();
}
// Mark as ready even if we're using default voices
this.isReady = true;
if (progressCallback) {
progressCallback(100, 'ElevenLabs TTS initialized');
}
return true;
} catch (error) {
console.error('ElevenLabs TTS: Initialization error:', error);
if (progressCallback) {
progressCallback(100, `ElevenLabs TTS initialization failed - ${error.message}`);
}
return false;
}
}
/**
* Get the default API base URL for ElevenLabs
* @returns {string} - Default API base URL
*/
getDefaultApiBaseUrl() {
return 'https://api.elevenlabs.io/v1';
}
/**
* Load available voices from ElevenLabs API
* @returns {Promise<boolean>} - Resolves with success status
*/
async loadVoices() {
if (!this.apiKey) {
console.log('ElevenLabs TTS: No API key provided, skipping voice loading');
// Return true to indicate initialization was successful, even without voices
// This allows the handler to appear in the dropdown for configuration
return true;
}
try {
const response = await fetch(`${this.apiBaseUrl}/voices`, {
method: 'GET',
headers: {
'Content-Type': 'application/json',
'xi-api-key': this.apiKey
}
});
if (!response.ok) {
throw new Error(`API error: ${response.status} ${response.statusText}`);
}
const data = await response.json();
if (data && data.voices && Array.isArray(data.voices)) {
this.voices = data.voices.map(voice => ({
id: voice.voice_id,
name: voice.name,
language: voice.labels?.language || 'unknown'
}));
return true;
}
return false;
} catch (error) {
console.error('ElevenLabs TTS: Error loading voices:', error);
return true; // Still return true to allow the handler to be configured
}
}
/**
* Select a voice for the given locale
* @param {string} locale - Locale code
* @returns {boolean} - Success status
*/
selectVoiceForLocale(locale) {
if (!this.voices || this.voices.length === 0) {
return this.selectDefaultVoice();
}
// Extract language code from locale (e.g., 'en-US' -> 'en')
const langCode = locale.split('-')[0].toLowerCase();
// Find a voice that matches the language code
const matchingVoice = this.voices.find(voice => {
if (voice.language && voice.language !== 'unknown') {
return voice.language.toLowerCase() === langCode;
}
return false;
});
if (matchingVoice) {
this.voiceOptions.voice = matchingVoice.id;
return true;
}
// If no match, use default
return this.selectDefaultVoice();
}
/**
* Select a default voice
* @returns {boolean} - Success status
*/
selectDefaultVoice() {
// If we have voices, use the first one
if (this.voices && this.voices.length > 0) {
this.voiceOptions.voice = this.voices[0].id;
return true;
}
// Use hardcoded default voice ID
this.voiceOptions.voice = 'pNInz6obpgDQGcFmaJgB';
return true;
}
/**
* Generate speech audio data using ElevenLabs API
* @param {string} text - Text to generate speech for
* @returns {Promise<Object>} - Audio data (Blob)
*/
async generateSpeechAudio(text) {
if (!text || !this.apiKey) {
return null;
}
try {
// Create request payload
const payload = {
text: text,
model_id: this.voiceOptions.model || 'eleven_multilingual_v2',
voice_settings: {
stability: 0.5,
similarity_boost: 0.75,
style: 0.0,
use_speaker_boost: true,
speed: this.voiceOptions.speed || 1.0
}
};
// Make API request
const response = await fetch(`${this.apiBaseUrl}/text-to-speech/${this.voiceOptions.voice}?optimize_streaming_latency=0`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'xi-api-key': this.apiKey,
'Accept': 'audio/wav'
},
body: JSON.stringify(payload)
});
if (!response.ok) {
throw new Error(`API error: ${response.status} ${response.statusText}`);
}
// Get audio blob from response
const audioBlob = await response.blob();
// Ensure it's treated as WAV
return new Blob([audioBlob], { type: 'audio/wav' });
} catch (error) {
console.error('ElevenLabs TTS: Error generating speech:', error);
return null;
}
}
/**
* Get available voices
* @returns {Promise<Array>} - Resolves with array of voice objects
*/
async getVoices() {
if (!this.available) {
return [];
}
// If voices are already loaded, return them
if (this.voices && this.voices.length > 0) {
return this.voices;
}
// Otherwise try to load voices
try {
await this.loadVoices();
return this.voices || [];
} catch (error) {
console.error('ElevenLabs TTS: Error getting voices:', error);
return [];
}
}
/**
* Set voice options
* @param {Object} options - Voice options
*/
setVoiceOptions(options = {}) {
// Call parent method for common options
super.setVoiceOptions(options);
// Handle ElevenLabs-specific options
if (options.model) {
this.voiceOptions.model = options.model;
// Save the model preference
const persistenceManager = this.getModule('persistence-manager');
if (persistenceManager) {
persistenceManager.updatePreference('tts', 'elevenlabs_model', options.model);
}
}
}
}
// Create the singleton instance
const ElevenLabsTTS = new ElevenLabsTTSHandler();
+3 -11
View File
@@ -29,7 +29,6 @@ export class KokoroHandler extends TTSHandler {
this.iframe = null;
this.currentAudio = null;
this.currentVoice = null;
this.preloadCache = new Map();
this.pendingGenerations = new Map();
this.generationCounter = 0;
@@ -501,18 +500,11 @@ export class KokoroHandler extends TTSHandler {
}
try {
// Check if already in cache
const cacheKey = `${this.currentVoice?.id || 'af_heart'}-${this.options.rate}-${text}`;
if (this.preloadCache.has(cacheKey)) {
return this.preloadCache.get(cacheKey);
}
// Generate speech
// No longer check the local cache as we're using TTSFactory's centralized cache
// Generate speech directly
const result = await this.generateSpeech(text);
// Store in cache
this.preloadCache.set(cacheKey, result);
// Return result for centralized caching in TTSFactory
return result;
} catch (error) {
console.error('Kokoro TTS: Error preloading speech:', error);
+235
View File
@@ -0,0 +1,235 @@
/**
* OpenAI TTS Handler
* Provides TTS via OpenAI API
*/
import { ApiTTSHandlerBase } from './api-tts-handler-base.js';
export class OpenAITTSHandler extends ApiTTSHandlerBase {
constructor() {
super('openai', 'OpenAI TTS');
// Voice options specific to OpenAI
this.voiceOptions = {
voice: 'alloy', // Default voice for OpenAI
model: 'tts-1', // Standard model
speed: 1.0,
response_format: 'mp3' // OpenAI supports mp3, opus, aac, and flac (not wav)
};
// Predefined voices
this.voices = [
{ id: 'alloy', name: 'Alloy', language: 'en' },
{ id: 'echo', name: 'Echo', language: 'en' },
{ id: 'fable', name: 'Fable', language: 'en' },
{ id: 'onyx', name: 'Onyx', language: 'en' },
{ id: 'nova', name: 'Nova', language: 'en' },
{ id: 'shimmer', name: 'Shimmer', language: 'en' }
];
// Bind methods
this.bindMethods([
'initialize',
'speak',
'speakPreloaded',
'preloadSpeech',
'stop',
'isAvailable',
'getId',
'getVoices',
'setVoiceOptions',
'getModule',
'setupVoiceFromPreferences',
'loadVoices',
'selectVoiceForLocale',
'selectDefaultVoice',
'generateSpeechAudio',
'getDefaultApiBaseUrl'
]);
}
/**
* Initialize the OpenAI TTS handler
* @param {Function} progressCallback - Callback for progress updates
* @returns {Promise<boolean>} - Resolves with success status
*/
async initialize(progressCallback = null) {
try {
// Call parent initialize method
const initSuccess = await super.initialize(progressCallback);
if (!initSuccess) {
return false;
}
// Load voice preferences
const persistenceManager = this.getModule('persistence-manager');
if (persistenceManager) {
// Load model preference
const model = persistenceManager.getPreference('tts', 'openai_model', 'tts-1');
if (model) {
this.voiceOptions.model = model;
}
// Load format preference
const format = persistenceManager.getPreference('tts', 'openai_format', 'mp3');
if (format) {
this.voiceOptions.response_format = format;
}
}
// OpenAI TTS should be considered available if the API key is set
// This will be checked by the parent class already
return true;
} catch (error) {
console.error('OpenAI TTS: Initialization error:', error);
if (progressCallback) {
progressCallback(100, `OpenAI TTS initialization failed - ${error.message}`);
}
return false;
}
}
/**
* Get the default API base URL for OpenAI
* @returns {string} - Default API base URL
*/
getDefaultApiBaseUrl() {
return 'https://api.openai.com/v1';
}
/**
* Load available voices from OpenAI API
* @returns {Promise<boolean>} - Resolves with success status
*/
async loadVoices() {
// OpenAI has a fixed set of voices, no need to fetch them
return true;
}
/**
* Select a voice for the given locale
* @param {string} locale - Locale code
* @returns {boolean} - Success status
*/
selectVoiceForLocale(locale) {
// Extract language code from locale (e.g., 'en-US' -> 'en')
const langCode = locale.split('-')[0].toLowerCase();
// All OpenAI voices are English-based, so if the locale is English, we might want to pick a specific voice
// Otherwise, just use the default voice
if (langCode === 'en') {
this.voiceOptions.voice = 'nova'; // A bit more natural-sounding for general use
return true;
}
// For non-English locales, still use a default voice (OpenAI voices can handle multiple languages)
return this.selectDefaultVoice();
}
/**
* Select a default voice
* @returns {boolean} - Success status
*/
selectDefaultVoice() {
this.voiceOptions.voice = 'alloy';
return true;
}
/**
* Generate speech audio data using OpenAI API
* @param {string} text - Text to generate speech for
* @returns {Promise<Object>} - Audio data (Blob)
*/
async generateSpeechAudio(text) {
if (!text || !this.apiKey) {
return null;
}
try {
// Create request payload
const payload = {
model: this.voiceOptions.model || 'tts-1',
input: text,
voice: this.voiceOptions.voice || 'alloy',
response_format: this.voiceOptions.response_format || 'mp3',
speed: this.voiceOptions.speed || 1.0
};
// Make API request
const response = await fetch(`${this.apiBaseUrl}/audio/speech`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${this.apiKey}`
},
body: JSON.stringify(payload)
});
if (!response.ok) {
const errorText = await response.text();
throw new Error(`API error: ${response.status} ${response.statusText} - ${errorText}`);
}
// Get audio blob from response
const audioBlob = await response.blob();
// Note: OpenAI doesn't support WAV format directly, so we're using the format specified in voiceOptions
// The audio element should still be able to play mp3/opus/aac properly
return new Blob([audioBlob], { type: `audio/${this.voiceOptions.response_format}` });
} catch (error) {
console.error('OpenAI TTS: Error generating speech:', error);
return null;
}
}
/**
* Get available voices
* @returns {Promise<Array>} - Resolves with array of voice objects
*/
async getVoices() {
if (!this.available) {
return [];
}
// OpenAI has a fixed set of voices
return this.voices;
}
/**
* Set voice options
* @param {Object} options - Voice options
*/
setVoiceOptions(options = {}) {
// Call parent method for common options
super.setVoiceOptions(options);
// Handle OpenAI-specific options
if (options.model) {
this.voiceOptions.model = options.model;
// Save the model preference
const persistenceManager = this.getModule('persistence-manager');
if (persistenceManager) {
persistenceManager.updatePreference('tts', 'openai_model', options.model);
}
}
if (options.response_format) {
// Ensure valid format: mp3, opus, aac, or flac
const validFormats = ['mp3', 'opus', 'aac', 'flac'];
if (validFormats.includes(options.response_format)) {
this.voiceOptions.response_format = options.response_format;
// Save the format preference
const persistenceManager = this.getModule('persistence-manager');
if (persistenceManager) {
persistenceManager.updatePreference('tts', 'openai_format', options.response_format);
}
}
}
}
}
// Create the singleton instance
const OpenAITTS = new OpenAITTSHandler();
+272 -24
View File
@@ -35,7 +35,8 @@ class OptionsUIModule extends BaseModule {
'showReloadNotice',
'toggle',
'setupEventListeners',
'saveCurrentSettings'
'saveCurrentSettings',
'setupApiUrlFields'
]);
}
@@ -85,6 +86,9 @@ class OptionsUIModule extends BaseModule {
// Apply settings
this.applySettings();
// Setup API URLs with default values if needed
this.setupApiUrlFields();
console.log('Options UI: Initialization complete');
}, 1000); // 1 second delay
});
@@ -266,6 +270,127 @@ class OptionsUIModule extends BaseModule {
ttsSection.appendChild(ttsVoiceContainer);
// API TTS Provider Settings (ElevenLabs and OpenAI)
// Container for API settings that will be shown/hidden based on selected TTS system
const apiSettingsContainer = document.createElement('div');
apiSettingsContainer.id = 'api-tts-settings';
apiSettingsContainer.className = 'api-settings-container';
apiSettingsContainer.style.display = 'none';
// ElevenLabs API Key
const elevenLabsApiKeyContainer = document.createElement('div');
elevenLabsApiKeyContainer.className = 'options-row elevenlabs-setting';
elevenLabsApiKeyContainer.dataset.provider = 'elevenlabs';
const elevenLabsApiKeyLabel = document.createElement('label');
elevenLabsApiKeyLabel.textContent = 'ElevenLabs API Key:';
elevenLabsApiKeyContainer.appendChild(elevenLabsApiKeyLabel);
const elevenLabsApiKey = document.createElement('input');
elevenLabsApiKey.type = 'password';
elevenLabsApiKey.id = 'elevenlabs-api-key';
elevenLabsApiKey.placeholder = 'Enter your ElevenLabs API key';
elevenLabsApiKey.addEventListener('change', (e) => {
const persistenceManager = this.getModule('persistence-manager');
if (persistenceManager) {
persistenceManager.updatePreference('tts', 'elevenlabs_api_key', e.target.value);
// Notify TTS system that API key has changed
document.dispatchEvent(new CustomEvent('tts:api:keyChanged', {
detail: { provider: 'elevenlabs', key: e.target.value }
}));
}
});
elevenLabsApiKeyContainer.appendChild(elevenLabsApiKey);
apiSettingsContainer.appendChild(elevenLabsApiKeyContainer);
// ElevenLabs API Base URL
const elevenLabsApiUrlContainer = document.createElement('div');
elevenLabsApiUrlContainer.className = 'options-row elevenlabs-setting';
elevenLabsApiUrlContainer.dataset.provider = 'elevenlabs';
const elevenLabsApiUrlLabel = document.createElement('label');
elevenLabsApiUrlLabel.textContent = 'ElevenLabs API URL:';
elevenLabsApiUrlContainer.appendChild(elevenLabsApiUrlLabel);
const elevenLabsApiUrl = document.createElement('input');
elevenLabsApiUrl.type = 'text';
elevenLabsApiUrl.id = 'elevenlabs-api-url';
elevenLabsApiUrl.placeholder = 'https://api.elevenlabs.io/v1';
elevenLabsApiUrl.addEventListener('change', (e) => {
const persistenceManager = this.getModule('persistence-manager');
if (persistenceManager) {
persistenceManager.updatePreference('tts', 'elevenlabs_api_base_url', e.target.value);
// Notify TTS system that API URL has changed
document.dispatchEvent(new CustomEvent('tts:api:urlChanged', {
detail: { provider: 'elevenlabs', url: e.target.value }
}));
}
});
elevenLabsApiUrlContainer.appendChild(elevenLabsApiUrl);
apiSettingsContainer.appendChild(elevenLabsApiUrlContainer);
// OpenAI API Key
const openaiApiKeyContainer = document.createElement('div');
openaiApiKeyContainer.className = 'options-row openai-setting';
openaiApiKeyContainer.dataset.provider = 'openai';
const openaiApiKeyLabel = document.createElement('label');
openaiApiKeyLabel.textContent = 'OpenAI API Key:';
openaiApiKeyContainer.appendChild(openaiApiKeyLabel);
const openaiApiKey = document.createElement('input');
openaiApiKey.type = 'password';
openaiApiKey.id = 'openai-api-key';
openaiApiKey.placeholder = 'Enter your OpenAI API key';
openaiApiKey.addEventListener('change', (e) => {
const persistenceManager = this.getModule('persistence-manager');
if (persistenceManager) {
persistenceManager.updatePreference('tts', 'openai_api_key', e.target.value);
// Notify TTS system that API key has changed
document.dispatchEvent(new CustomEvent('tts:api:keyChanged', {
detail: { provider: 'openai', key: e.target.value }
}));
}
});
openaiApiKeyContainer.appendChild(openaiApiKey);
apiSettingsContainer.appendChild(openaiApiKeyContainer);
// OpenAI API Base URL
const openaiApiUrlContainer = document.createElement('div');
openaiApiUrlContainer.className = 'options-row openai-setting';
openaiApiUrlContainer.dataset.provider = 'openai';
const openaiApiUrlLabel = document.createElement('label');
openaiApiUrlLabel.textContent = 'OpenAI API URL:';
openaiApiUrlContainer.appendChild(openaiApiUrlLabel);
const openaiApiUrl = document.createElement('input');
openaiApiUrl.type = 'text';
openaiApiUrl.id = 'openai-api-url';
openaiApiUrl.placeholder = 'https://api.openai.com/v1';
openaiApiUrl.addEventListener('change', (e) => {
const persistenceManager = this.getModule('persistence-manager');
if (persistenceManager) {
persistenceManager.updatePreference('tts', 'openai_api_base_url', e.target.value);
// Notify TTS system that API URL has changed
document.dispatchEvent(new CustomEvent('tts:api:urlChanged', {
detail: { provider: 'openai', url: e.target.value }
}));
}
});
openaiApiUrlContainer.appendChild(openaiApiUrl);
apiSettingsContainer.appendChild(openaiApiUrlContainer);
ttsSection.appendChild(apiSettingsContainer);
// Speed controls
const speedContainer = document.createElement('div');
speedContainer.className = 'options-row';
@@ -494,7 +619,12 @@ class OptionsUIModule extends BaseModule {
effectsVolume,
reloadNotice,
speechRate: speedSlider,
ttsSpeechToggle
ttsSpeechToggle,
apiSettingsContainer,
elevenLabsApiKey,
elevenLabsApiUrl,
openaiApiKey,
openaiApiUrl
};
}
@@ -542,41 +672,78 @@ class OptionsUIModule extends BaseModule {
// Clear existing options
this.elements.ttsSystem.innerHTML = '';
// Add "None" option first
// Add 'None' option
const noneOption = document.createElement('option');
noneOption.value = 'none';
noneOption.textContent = 'None (Disable TTS)';
noneOption.textContent = 'None';
this.elements.ttsSystem.appendChild(noneOption);
// Get available handlers
// Get available TTS handlers
const handlers = ttsFactory.getAvailableHandlers();
console.log('Options UI: Available TTS handlers:', handlers.map(h => h.id).join(', '));
// Add all registered handlers
for (const id in handlers) {
// Add options for each handler
for (const handler of handlers) {
const option = document.createElement('option');
option.value = id;
option.textContent = this.getTtsSystemName(id);
option.value = handler.id;
option.textContent = this.getTtsSystemName(handler.id);
this.elements.ttsSystem.appendChild(option);
}
// If no handlers available, add a disabled option
if (this.elements.ttsSystem.options.length === 1) {
const option = document.createElement('option');
option.value = '';
option.textContent = 'No TTS systems available';
option.disabled = true;
this.elements.ttsSystem.appendChild(option);
// Set the current active handler
const activeHandler = ttsFactory.getActiveHandler();
console.log('Options UI: Active TTS handler:', activeHandler ? (activeHandler.getId ? activeHandler.getId() : activeHandler.id) : 'none');
if (activeHandler) {
if (typeof activeHandler.getId === 'function') {
// Use getId() if available
this.elements.ttsSystem.value = activeHandler.getId();
} else if (activeHandler.id) {
// Otherwise try to use the id property
this.elements.ttsSystem.value = activeHandler.id;
} else {
// If no id is available, default to 'none'
this.elements.ttsSystem.value = 'none';
console.warn('Options UI: Active TTS handler has no ID');
}
} else {
this.elements.ttsSystem.value = 'none';
}
// Set the current provider value in the dropdown
if (this.persistenceManager) {
const provider = this.persistenceManager.getPreference('tts', 'provider');
if (provider) {
const option = Array.from(this.elements.ttsSystem.options).find(opt => opt.value === provider);
if (option) {
this.elements.ttsSystem.value = provider;
}
// Show/hide API settings based on selected TTS system
this.updateApiSettingsVisibility();
// Add change event to show/hide API settings
this.elements.ttsSystem.addEventListener('change', () => {
this.updateApiSettingsVisibility();
});
}
/**
* Update visibility of API settings based on selected TTS system
*/
updateApiSettingsVisibility() {
if (!this.elements || !this.elements.apiSettingsContainer) return;
const selectedProvider = this.elements.ttsSystem.value;
// Show/hide API settings container based on whether an API provider is selected
if (selectedProvider === 'elevenlabs' || selectedProvider === 'openai') {
this.elements.apiSettingsContainer.style.display = 'block';
// Show/hide provider-specific settings
const elevenLabsSettings = document.querySelectorAll('.elevenlabs-setting');
const openaiSettings = document.querySelectorAll('.openai-setting');
elevenLabsSettings.forEach(element => {
element.style.display = selectedProvider === 'elevenlabs' ? 'flex' : 'none';
});
openaiSettings.forEach(element => {
element.style.display = selectedProvider === 'openai' ? 'flex' : 'none';
});
} else {
this.elements.apiSettingsContainer.style.display = 'none';
}
}
@@ -761,6 +928,26 @@ class OptionsUIModule extends BaseModule {
if (this.elements.ttsSpeechToggle) {
this.elements.ttsSpeechToggle.checked = prefs.tts.enabled;
}
// ElevenLabs API Key
if (this.elements.elevenLabsApiKey) {
this.elements.elevenLabsApiKey.value = prefs.tts.elevenlabs_api_key;
}
// ElevenLabs API Base URL
if (this.elements.elevenLabsApiUrl) {
this.elements.elevenLabsApiUrl.value = prefs.tts.elevenlabs_api_base_url;
}
// OpenAI API Key
if (this.elements.openaiApiKey) {
this.elements.openaiApiKey.value = prefs.tts.openai_api_key;
}
// OpenAI API Base URL
if (this.elements.openaiApiUrl) {
this.elements.openaiApiUrl.value = prefs.tts.openai_api_base_url;
}
});
}
@@ -873,6 +1060,22 @@ class OptionsUIModule extends BaseModule {
// Save text speed setting
const textSpeed = parseInt(this.elements.textSpeed.value);
this.persistenceManager.updatePreference('animation', 'speed', textSpeed);
// Save ElevenLabs API Key
const elevenLabsApiKey = this.elements.elevenLabsApiKey.value;
this.persistenceManager.updatePreference('tts', 'elevenlabs_api_key', elevenLabsApiKey);
// Save ElevenLabs API Base URL
const elevenLabsApiUrl = this.elements.elevenLabsApiUrl.value;
this.persistenceManager.updatePreference('tts', 'elevenlabs_api_base_url', elevenLabsApiUrl);
// Save OpenAI API Key
const openaiApiKey = this.elements.openaiApiKey.value;
this.persistenceManager.updatePreference('tts', 'openai_api_key', openaiApiKey);
// Save OpenAI API Base URL
const openaiApiUrl = this.elements.openaiApiUrl.value;
this.persistenceManager.updatePreference('tts', 'openai_api_base_url', openaiApiUrl);
}
setupEventListeners() {
@@ -944,6 +1147,51 @@ class OptionsUIModule extends BaseModule {
}
});
}
setupApiUrlFields() {
if (!this.elements) return;
const persistenceManager = this.getModule('persistence-manager');
if (!persistenceManager) return;
// Set up ElevenLabs API URL
if (this.elements.elevenLabsApiUrl) {
const savedUrl = persistenceManager.getPreference('tts', 'elevenlabs_api_url');
if (!savedUrl) {
const defaultUrl = 'https://api.elevenlabs.io/v1';
console.log('Options UI: Setting default ElevenLabs API URL:', defaultUrl);
this.elements.elevenLabsApiUrl.value = defaultUrl;
persistenceManager.updatePreference('tts', 'elevenlabs_api_url', defaultUrl);
// Also dispatch the change event to notify the handler
window.dispatchEvent(new CustomEvent('tts:api:urlChanged', {
detail: {
provider: 'elevenlabs',
url: defaultUrl
}
}));
}
}
// Set up OpenAI API URL
if (this.elements.openaiApiUrl) {
const savedUrl = persistenceManager.getPreference('tts', 'openai_api_url');
if (!savedUrl) {
const defaultUrl = 'https://api.openai.com/v1';
console.log('Options UI: Setting default OpenAI API URL:', defaultUrl);
this.elements.openaiApiUrl.value = defaultUrl;
persistenceManager.updatePreference('tts', 'openai_api_url', defaultUrl);
// Also dispatch the change event to notify the handler
window.dispatchEvent(new CustomEvent('tts:api:urlChanged', {
detail: {
provider: 'openai',
url: defaultUrl
}
}));
}
}
}
}
// Create the singleton instance
+262 -30
View File
@@ -1,12 +1,13 @@
/**
* TTS Factory Module
* Creates and manages TTS handler instances
* Manages TTS handler instances
*/
import { BaseModule } from './base-module.js';
import { moduleRegistry } from './module-registry.js';
import { BrowserTTSHandler } from './browser-tts-handler.js';
import { ApiTTSHandler } from './api-tts-handler.js';
import { KokoroHandler } from './kokoro-handler.js';
import { ElevenLabsTTSHandler } from './elevenlabs-tts-handler.js';
import { OpenAITTSHandler } from './openai-tts-handler.js';
class TTSFactoryModule extends BaseModule {
/**
@@ -22,6 +23,12 @@ class TTSFactoryModule extends BaseModule {
this.ttsAvailable = false;
this.speed = 1; // Default speed
// LRU Cache for preloaded speech
this.audioCache = new Map();
this.maxCacheSize = 20; // Maximum number of cached items
this.cacheHits = 0;
this.cacheMisses = 0;
// Listen for kokoro:ready event
document.addEventListener('kokoro:ready', (event) => {
if (event.detail && typeof event.detail.success === 'boolean') {
@@ -43,6 +50,15 @@ class TTSFactoryModule extends BaseModule {
}
});
// Listen for handler availability changes
document.addEventListener('tts:handler:availabilityChanged', (event) => {
if (event && event.detail) {
const { handlerId, available } = event.detail;
console.log(`TTS Factory: Handler ${handlerId} availability changed to ${available}`);
this.updateTTSAvailability();
}
});
// Bind methods
this.bindMethods([
'registerHandler',
@@ -58,7 +74,15 @@ class TTSFactoryModule extends BaseModule {
'getVoices',
'getPreference',
'isSpeaking',
'configure'
'configure',
'preloadSpeech',
'generateSpeechHash',
'speakPreloaded',
'getCachedSpeech',
'addToCache',
'manageCacheSize',
'cacheSpeech',
'isSpeechCached'
]);
}
@@ -80,16 +104,23 @@ class TTSFactoryModule extends BaseModule {
return false;
}
// Register available handlers
// Reset any previous state
this.initStatus = {};
for (const id in this.handlers) {
this.initStatus[id] = false;
}
// Register all available handlers (this will overwrite any existing handlers)
console.log('TTS Factory: Registering all handlers');
this.registerHandler('browser', new BrowserTTSHandler());
this.registerHandler('api', new ApiTTSHandler());
this.registerHandler('elevenlabs', new ElevenLabsTTSHandler());
this.registerHandler('openai', new OpenAITTSHandler());
this.registerHandler('kokoro', new KokoroHandler());
console.log('TTS Factory: Registered handlers:', Object.keys(this.handlers));
this.reportProgress(30, "Registered TTS handlers");
// Force the initialization of all handlers for diagnostics
// This ensures they're all initialized even if not selected
// Initialize all handlers in parallel for efficiency
const initPromises = [];
for (const id of Object.keys(this.handlers)) {
console.log(`TTS Factory: Initializing handler ${id}`);
@@ -105,7 +136,13 @@ class TTSFactoryModule extends BaseModule {
// Get user preferences
const ttsEnabled = this.getPreference('tts', 'enabled', false);
const preferredProvider = this.getPreference('tts', 'provider', 'browser');
let preferredProvider = this.getPreference('tts', 'provider', '');
// Default to browser if no provider is set
if (!preferredProvider || preferredProvider === 'none') {
preferredProvider = 'browser';
persistenceManager.updatePreference('tts', 'provider', 'browser');
}
console.log(`TTS Factory: User preferences - enabled: ${ttsEnabled}, provider: ${preferredProvider}`);
@@ -128,7 +165,7 @@ class TTSFactoryModule extends BaseModule {
this.reportProgress(60, "Using Kokoro TTS as fallback");
this.setActiveHandler('kokoro');
// Update preference to Kokoro since it worked
this.getModule('persistence-manager').updatePreference('tts', 'provider', 'kokoro');
persistenceManager.updatePreference('tts', 'provider', 'kokoro');
initSuccess = true;
}
// Try Browser TTS as fallback if not already tried
@@ -136,20 +173,24 @@ class TTSFactoryModule extends BaseModule {
this.reportProgress(70, "Using Browser TTS as fallback");
this.setActiveHandler('browser');
// Update preference to Browser since it worked
this.getModule('persistence-manager').updatePreference('tts', 'provider', 'browser');
persistenceManager.updatePreference('tts', 'provider', 'browser');
initSuccess = true;
}
else {
// If all failed, disable TTS
this.reportProgress(80, "All TTS handlers failed, disabling TTS");
this.getModule('persistence-manager').updatePreference('tts', 'enabled', false);
this.getModule('persistence-manager').updatePreference('tts', 'provider', 'none');
// If all failed, set to none but don't disable TTS entirely
// This allows configuring API-based TTS later
this.reportProgress(80, "No working TTS handlers found");
persistenceManager.updatePreference('tts', 'provider', 'none');
}
}
}
// Determine overall TTS availability
this.ttsAvailable = this.initStatus.kokoro || this.initStatus.browser;
// Any handler that's initialized should count towards availability
this.ttsAvailable = Object.values(this.initStatus).some(status => status === true);
console.log('TTS Factory: Overall TTS availability:', this.ttsAvailable);
console.log('TTS Factory: Handler status:', this.initStatus);
// Dispatch TTS availability event
window.dispatchEvent(new CustomEvent('tts:availability', {
@@ -290,25 +331,33 @@ class TTSFactoryModule extends BaseModule {
}
/**
* Get all available TTS handlers
* @returns {Object} - Map of handler IDs to initialization status
* Get available TTS handlers
* @returns {Array} - Array of handler objects
*/
getAvailableHandlers() {
const available = {};
// Debug logging for diagnostic purposes
console.log('TTS Factory: getAvailableHandlers called');
console.log('TTS Factory: Current initialization status:', this.initStatus);
console.log('TTS Factory: Registered handlers:', Object.keys(this.handlers).join(', '));
const availableHandlers = [];
// Always show all initialized handlers in the options dropdown,
// regardless of availability status. This ensures API handlers are configurable
// even when the API key is not set.
for (const id in this.handlers) {
// Add the handler to the available list even if it's not initialized yet
// This ensures all registered handlers appear in the options
available[id] = true;
console.log(`TTS Factory: Including handler ${id} in options`);
// Only include handlers that have been initialized
if (this.handlers[id] && this.initStatus[id]) {
console.log(`TTS Factory: Handler ${id} is initialized, adding to available handlers list`);
availableHandlers.push({
id: id,
handler: this.handlers[id]
});
}
}
return available;
if (availableHandlers.length === 0) {
console.warn('TTS Factory: No available handlers found - something is wrong!');
} else {
console.log(`TTS Factory: Found ${availableHandlers.length} available handlers`);
}
return availableHandlers;
}
/**
@@ -471,8 +520,8 @@ class TTSFactoryModule extends BaseModule {
} else if (id === 'kokoro') {
// Kokoro uses rate from 0.5 to 1.5
scaledOptions.rate = 0.5 + (normalizedSpeed);
} else if (id === 'api') {
// API uses speed from 0.5 to 2.0
} else if (id === 'elevenlabs' || id === 'openai') {
// ElevenLabs and OpenAI use speed from 0.5 to 2.0
scaledOptions.speed = 0.5 + (normalizedSpeed * 1.5);
}
@@ -490,6 +539,189 @@ class TTSFactoryModule extends BaseModule {
return true;
}
/**
* Preload speech for a text
* @param {string} text - Text to preload
* @returns {Promise<Object>} - Resolves with preloaded speech data
*/
async preloadSpeech(text) {
if (!this.activeHandler) {
console.warn("TTS Factory: No active TTS handler for preload");
return null;
}
try {
// Generate a hash for this speech request
const hash = await this.generateSpeechHash(text);
// Check if we already have this audio in cache
const cachedData = this.getCachedSpeech(hash);
if (cachedData) {
console.log(`TTS Factory: Using cached speech for hash ${hash} (hits: ${this.cacheHits}, misses: ${this.cacheMisses})`);
// Move this item to the end of the Map to mark it as most recently used
this.audioCache.delete(hash);
this.audioCache.set(hash, cachedData);
this.cacheHits++;
return cachedData;
}
// Cache miss - need to generate new speech data
this.cacheMisses++;
// If the handler has a preloadSpeech method, use it
if (typeof this.handlers[this.activeHandler].preloadSpeech === 'function') {
const preloadData = await this.handlers[this.activeHandler].preloadSpeech(text);
// Cache the generated speech data
if (preloadData) {
this.addToCache(hash, preloadData);
console.log(`TTS Factory: Added speech to cache for hash ${hash} (size: ${this.audioCache.size}/${this.maxCacheSize})`);
}
return preloadData;
} else {
console.warn(`TTS Factory: Handler ${this.activeHandler} does not support preloading`);
return null;
}
} catch (error) {
console.error("TTS Factory: Error preloading speech:", error);
return null;
}
}
/**
* Generate a unique hash for a speech request
* @param {string} text - Text to generate hash for
* @returns {Promise<string>} - Hash string
*/
async generateSpeechHash(text) {
if (!this.activeHandler) return null;
// Get voice ID and other parameters
const handler = this.handlers[this.activeHandler];
const handlerId = this.activeHandler;
const voiceId = handler.voiceOptions?.voice?.id || 'default';
const speed = this.speed;
// Create a string to hash
const dataToHash = `${handlerId}_${voiceId}_${speed}_${text}`;
// Use SubtleCrypto to create a SHA-256 hash if available
try {
const encoder = new TextEncoder();
const data = encoder.encode(dataToHash);
const hashBuffer = await crypto.subtle.digest('SHA-256', data);
// Convert to hex string
const hashArray = Array.from(new Uint8Array(hashBuffer));
const hashHex = hashArray.map(b => b.toString(16).padStart(2, '0')).join('');
return hashHex;
} catch (error) {
// Fallback to simple string hash if SubtleCrypto is not available
console.warn('TTS Factory: Unable to generate crypto hash, using fallback', error);
let hash = 0;
for (let i = 0; i < dataToHash.length; i++) {
const char = dataToHash.charCodeAt(i);
hash = ((hash << 5) - hash) + char;
hash = hash & hash; // Convert to 32bit integer
}
return Math.abs(hash).toString(16);
}
}
/**
* Speak using preloaded speech data
* @param {Object} preloadData - Preloaded speech data
* @param {Object} options - Speech options
* @returns {Promise<boolean>} - Success status
*/
async speakPreloaded(preloadData, options = {}) {
if (!this.activeHandler) {
console.warn("TTS Factory: No active TTS handler for speak preloaded");
return false;
}
// If the handler has a speakPreloaded method, use it
if (typeof this.handlers[this.activeHandler].speakPreloaded === 'function') {
return await this.handlers[this.activeHandler].speakPreloaded(preloadData, options);
} else {
console.warn(`TTS Factory: Handler ${this.activeHandler} does not support speaking preloaded data`);
return false;
}
}
/**
* Get cached speech data
* @param {string} hash - Hash of the speech data
* @returns {Object|null} - Cached speech data or null if not found
*/
getCachedSpeech(hash) {
if (!this.audioCache || !this.audioCache.has(hash)) return null;
return this.audioCache.get(hash);
}
/**
* Add speech data to the cache
* @param {string} hash - Hash of the speech data
* @param {Object} data - Speech data to cache
*/
addToCache(hash, data) {
if (!this.audioCache) this.audioCache = new Map();
this.audioCache.set(hash, data);
this.cacheMisses++;
// Manage cache size
this.manageCacheSize();
}
/**
* Manage cache size
*/
manageCacheSize() {
if (!this.audioCache) return;
// Check if cache size exceeds the maximum allowed
if (this.audioCache.size > this.maxCacheSize) {
// Remove the oldest item from the cache
const oldestKey = this.audioCache.keys().next().value;
this.audioCache.delete(oldestKey);
}
}
/**
* Generate a hash for a speech request
* @param {string} text - Text to generate hash for
* @returns {Promise<string>} - Hash value
*/
async generateSpeechHash(text) {
// For now, just use the text as the hash
// In a more complex implementation, you could include voice ID and other parameters
// You could also use a proper hashing function
return `${this.activeHandler}-${text}`;
}
/**
* Check if speech is cached by text
* @param {string} text - Text to check
* @returns {boolean} - True if cached
*/
async isSpeechCached(text) {
const hash = await this.generateSpeechHash(text);
return this.audioCache && this.audioCache.has(hash);
}
/**
* Cache speech data with text as key
* @param {string} text - Text used for the speech
* @param {Object} audioData - The audio data to cache
*/
async cacheSpeech(text, audioData) {
const hash = await this.generateSpeechHash(text);
this.addToCache(hash, audioData);
}
/**
* Clean up when module is disposed
*/
+24
View File
@@ -9,6 +9,9 @@ export class TTSHandler {
// Set up event dispatcher
this.eventTarget = document.createElement('div');
// Module state tracking - conform to BaseModule interface
this.state = 'PENDING';
}
/**
@@ -77,6 +80,27 @@ export class TTSHandler {
return [];
}
/**
* Get the current module state
* @returns {string} - Current state
*/
getState() {
return this.state;
}
/**
* Change the module state
* @param {string} newState - The new state
*/
changeState(newState) {
this.state = newState;
// Dispatch state change event
this.dispatchEvent('state:changed', {
state: newState
});
}
/**
* Dispatch a custom event
* @param {string} eventName - Name of the event
+31 -23
View File
@@ -169,16 +169,13 @@ class TTSPlayerModule extends BaseModule {
if (!this.isSpeaking() || (this.currentSpeech && this.currentSpeech !== text)) {
console.log(`TTS Player: Preloading speech for: "${text.substring(0, 50)}${text.length > 50 ? '...' : ''}"`);
// Use the preload method of the TTS factory if available
if (typeof ttsFactory.preloadSpeech === 'function') {
await ttsFactory.preloadSpeech(text);
this.preloadedAudio.set(text, true);
// Use the preload method of the TTS factory
const preloadData = await ttsFactory.preloadSpeech(text);
if (preloadData) {
this.preloadedAudio.set(text, preloadData);
console.log(`TTS Player: Successfully preloaded speech for: "${text.substring(0, 50)}${text.length > 50 ? '...' : ''}"`);
} else {
// Fallback: use normal speak method with a dummy callback
ttsFactory.speak(text, () => {
ttsFactory.stop(); // Stop immediately after generation
this.preloadedAudio.set(text, true);
});
console.warn(`TTS Player: Failed to preload speech for: "${text.substring(0, 50)}${text.length > 50 ? '...' : ''}"`);
}
}
} catch (error) {
@@ -195,21 +192,14 @@ class TTSPlayerModule extends BaseModule {
}
/**
* Speak text
* Speak a sentence
* @param {string} text - Text to speak
* @param {Function} callback - Optional callback for when speech completes
* @returns {boolean} - True if speech started successfully
* @param {Function} callback - Callback for when speech completes
* @returns {boolean} - Success status
*/
speak(text, callback = null) {
if (!text) return false;
console.log(`TTS Player: Speaking "${text.substring(0, 50)}${text.length > 50 ? '...' : ''}"`, this.enabled ? "(TTS enabled)" : "(TTS disabled)");
// Store the current speech text
this.currentSpeech = text;
// Check if TTS is enabled
if (!this.enabled) {
console.log("TTS Player: TTS is disabled, not speaking");
if (callback) {
setTimeout(() => callback({ success: false, reason: 'tts_disabled' }), 0);
}
@@ -220,15 +210,32 @@ class TTSPlayerModule extends BaseModule {
const ttsFactory = this.getModule('tts-factory');
if (ttsFactory) {
this.pendingCallback = callback;
this.currentSpeech = text;
// Check if this text was preloaded
const wasPreloaded = this.preloadedAudio.has(text);
if (wasPreloaded) {
const preloadedData = this.preloadedAudio.get(text);
if (preloadedData) {
console.log("TTS Player: Using preloaded speech");
this.preloadedAudio.delete(text); // Remove from cache after use
// Use the preloaded speech data
ttsFactory.speakPreloaded(preloadedData, (result) => {
// Store the completed result
this.currentSpeech = null;
// Call the callback if provided
if (this.pendingCallback) {
this.pendingCallback(result);
this.pendingCallback = null;
}
// Start TTS with minimal delay to synchronize with text rendering
// Process next in preload queue if any
if (this.preloadQueue.length > 0 && !this.isPreloading) {
this.processPreloadQueue();
}
});
} else {
// Start TTS with regular speech if not preloaded
ttsFactory.speak(text, (result) => {
// Store the completed result
this.currentSpeech = null;
@@ -244,6 +251,7 @@ class TTSPlayerModule extends BaseModule {
this.processPreloadQueue();
}
});
}
return true;
} else {