Fix Kokoro TTS integration issues: Remove API key requirement and ensure system-specific options display correctly

2025-04-05 22:06:22 +00:00
parent e5a3016846
commit fc693ae695
11 changed files with 3296 additions and 596 deletions
@@ -0,0 +1,570 @@
+/**
+ * BrowserTTSModule for AI Interactive Fiction
+ * Implementation using the browser's Web Speech API
+ */
+import { TTSHandlerModule } from './tts-handler-module.js';
+
+/**
+ * Browser TTS Module - Uses the browser's Web Speech API for TTS
+ */
+export class BrowserTTSModule extends TTSHandlerModule {
+    constructor() {
+        super('browser', 'Browser TTS');
+        
+        // Voice options
+        this.voiceOptions = {
+            voice: null, // Will be set during initialization
+            rate: 1.0,
+            pitch: 1.0,
+            volume: 1.0
+        };
+        
+        // State
+        this.available = false;
+        this.currentUtterance = null;
+        
+        // Ensure dependencies are correctly defined from parent class
+        // this.dependencies should already contain ['persistence-manager', 'localization']
+        
+        // Bind additional methods beyond those in TTSHandlerModule
+        this.bindMethods([
+            'onVoicesChanged',
+            'loadVoices', 
+            'selectVoiceForLocale',
+            'synthesizeToWav',
+            'speakPreloaded',
+            'speak',
+            'preprocessText',
+            'inferVoiceGender'
+        ]);
+    }
+    
+    /**
+     * Initialize the browser TTS module
+     * @returns {Promise<boolean>} - Resolves with success status
+     */
+    async initialize() {
+        try {
+            this.reportProgress(10, 'Initializing Browser TTS');
+            
+            // Check for browser support
+            if (!window.speechSynthesis) {
+                console.error('Browser TTS: Speech synthesis not available in this browser');
+                return false;
+            }
+            
+            this.reportProgress(30, 'Browser TTS supported');
+            
+            // Initialize parent
+            const parentInit = await super.initialize();
+            if (!parentInit) {
+                console.error('Browser TTS: Parent initialization failed');
+                return false;
+            }
+            
+            // Get required dependencies
+            const persistenceManager = this.getModule('persistence-manager');
+            if (!persistenceManager) {
+                console.error('Browser TTS: Required dependency persistence-manager not found');
+                return false;
+            }
+            
+            const localization = this.getModule('localization');
+            if (!localization) {
+                console.error('Browser TTS: Required dependency localization not found');
+                return false;
+            }
+            
+            // Load voices
+            const voicesLoaded = await this.loadVoices();
+            if (!voicesLoaded) {
+                console.error('Browser TTS: Failed to load voices');
+                return false;
+            }
+
+            // Set speech options from preferences
+            this.voiceOptions.rate = persistenceManager.getPreference('tts', 'rate', 1.0);
+            this.voiceOptions.pitch = persistenceManager.getPreference('tts', 'pitch', 1.0);
+            this.voiceOptions.volume = persistenceManager.getPreference('tts', 'volume', 1.0);
+            const preferredVoice = persistenceManager.getPreference('tts', 'browser_voice', '');
+            
+            // Set voice based on current locale
+            const currentLocale = localization.getLocale() || 'en-us';
+            await this.selectVoiceForLocale(currentLocale, preferredVoice);
+            
+            // Listen for locale changes
+            document.addEventListener('locale:changed', async (event) => {
+                if (event.detail && event.detail.locale) {
+                    await this.selectVoiceForLocale(event.detail.locale);
+                }
+            });
+            
+            // Listen for voices changed events
+            if (window.speechSynthesis.onvoiceschanged !== undefined) {
+                window.speechSynthesis.onvoiceschanged = this.onVoicesChanged;
+            }
+            
+            this.isReady = true;
+            this.available = true;
+            this.reportProgress(100, 'Browser TTS initialized');
+            
+            return true;
+        } catch (error) {
+            console.error('Browser TTS: Initialization error:', error);
+            this.isReady = false;
+            this.available = false;
+            return false;
+        }
+    }
+    
+    /**
+     * Handle voices changed event
+     */
+    async onVoicesChanged() {
+        await this.loadVoices();
+        
+        // Re-select voice based on current locale
+        const localization = this.getModule('localization');
+        const persistenceManager = this.getModule('persistence-manager');
+        
+        if (localization && persistenceManager) {
+            const currentLocale = localization.getLocale() || 'en-us';
+            const preferredVoice = persistenceManager.getPreference('tts', 'browser_voice', '');
+            await this.selectVoiceForLocale(currentLocale, preferredVoice);
+        }
+    }
+    
+    /**
+     * Load available voices from the speech synthesis API
+     * @returns {Promise<boolean>} - Resolves with success status
+     */
+    async loadVoices() {
+        try {
+            this.reportProgress(40, 'Loading browser voices');
+            
+            // Try to get voices
+            let voices = window.speechSynthesis.getVoices();
+            
+            // If voices array is empty, wait for onvoiceschanged event
+            if (!voices || voices.length === 0) {
+                try {
+                    console.log('Browser TTS: No voices available immediately, waiting for voices to load...');
+                    
+                    // Wait for voices to be loaded (with timeout)
+                    voices = await new Promise((resolve, reject) => {
+                        // Set a timeout in case voices never load
+                        const timeout = setTimeout(() => {
+                            console.warn('Browser TTS: Timeout waiting for voices');
+                            // Resolve with empty array instead of rejecting
+                            resolve([]);
+                        }, 3000);
+                        
+                        // Listen for voices changed event
+                        window.speechSynthesis.onvoiceschanged = () => {
+                            clearTimeout(timeout);
+                            const loadedVoices = window.speechSynthesis.getVoices();
+                            console.log(`Browser TTS: Voices loaded, found ${loadedVoices.length} voices`);
+                            resolve(loadedVoices);
+                        };
+                    });
+                } catch (voiceWaitError) {
+                    console.error('Browser TTS: Error waiting for voices:', voiceWaitError);
+                    // Continue with empty voices array
+                    voices = [];
+                }
+            }
+            
+            // Store voices
+            this.voices = voices || [];
+            
+            // Log available voices for debugging
+            console.log(`Browser TTS: Loaded ${this.voices.length} voices`);
+            if (this.voices.length > 0) {
+                console.log('Browser TTS: First few voices:', this.voices.slice(0, 3));
+            }
+            
+            // If no voices available but speech synthesis is supported, still return true
+            // Some browsers may not expose voices but still support speech synthesis
+            if (this.voices.length === 0) {
+                console.warn('Browser TTS: No voices available, but continuing with default voice');
+                // Create a default voice entry
+                this.voices = [{
+                    default: true,
+                    lang: 'en-US',
+                    localService: true,
+                    name: 'Default Voice',
+                    voiceURI: 'default'
+                }];
+            }
+            
+            this.reportProgress(60, 'Browser voices loaded');
+            return true;
+        } catch (error) {
+            console.error('Browser TTS: Error loading voices:', error);
+            return false;
+        }
+    }
+    
+    /**
+     * Set voice based on locale
+     * @param {string} locale - Locale code (e.g., 'en-us', 'de', 'fr')
+     * @param {string} preferredVoice - Optional preferred voice name
+     * @returns {Promise<boolean>} - Success status
+     */
+    async selectVoiceForLocale(locale = 'en-us', preferredVoice = '') {
+        // Normalize locale format
+        locale = locale.toLowerCase().replace('_', '-');
+        const languageCode = locale.split('-')[0];
+        
+        // First try to use the preferred voice if specified
+        if (preferredVoice) {
+            const voice = this.voices.find(v => 
+                v.name === preferredVoice || 
+                v.voiceURI === preferredVoice
+            );
+            
+            if (voice) {
+                this.voiceOptions.voice = voice;
+                return true;
+            }
+        }
+        
+        // Try to find a voice that matches the exact locale
+        const exactMatch = this.voices.find(v => 
+            v.lang.toLowerCase() === locale
+        );
+        
+        if (exactMatch) {
+            this.voiceOptions.voice = exactMatch;
+            return true;
+        }
+        
+        // Try to find a voice that matches the language code
+        const languageMatch = this.voices.find(v => 
+            v.lang.toLowerCase().startsWith(languageCode)
+        );
+        
+        if (languageMatch) {
+            this.voiceOptions.voice = languageMatch;
+            return true;
+        }
+        
+        // Fallback to the first available voice
+        if (this.voices.length > 0) {
+            this.voiceOptions.voice = this.voices[0];
+            return true;
+        }
+        
+        // No voices available
+        return false;
+    }
+    
+    /**
+     * Speak text
+     * @param {string} text - Text to speak
+     * @param {Function} callback - Callback for when speech completes
+     * @returns {boolean} - Success status
+     */
+    speak(text, callback = null) {
+        if (!this.isReady || !window.speechSynthesis) {
+            if (callback) {
+                callback({ success: false, reason: 'not_ready' });
+            }
+            return false;
+        }
+        
+        // Stop any ongoing speech
+        this.stop();
+        
+        const processedText = this.preprocessText(text);
+        
+        // Create utterance
+        const utterance = new SpeechSynthesisUtterance(processedText);
+        
+        // Set options
+        if (this.voiceOptions.voice) {
+            utterance.voice = this.voiceOptions.voice;
+        }
+        
+        utterance.rate = this.voiceOptions.rate;
+        utterance.pitch = this.voiceOptions.pitch;
+        utterance.volume = this.voiceOptions.volume;
+        
+        // Set up event handlers
+        utterance.onend = () => {
+            this.isSpeaking = false;
+            if (callback) {
+                callback({ success: true });
+            }
+        };
+        
+        utterance.onerror = (error) => {
+            this.isSpeaking = false;
+            console.error('Browser TTS: Speech error', error);
+            if (callback) {
+                callback({ success: false, reason: 'synthesis_error', error });
+            }
+        };
+        
+        // Store current utterance
+        this.currentUtterance = utterance;
+        this.isSpeaking = true;
+        
+        // Start speaking
+        window.speechSynthesis.speak(utterance);
+        
+        return true;
+    }
+    
+    /**
+     * Preload speech for a text
+     * @param {string} text - Text to preload
+     * @returns {Promise<Object>} - Preloaded speech data
+     */
+    async preloadSpeech(text) {
+        if (!this.isReady || !window.speechSynthesis) {
+            return { success: false, reason: 'not_ready' };
+        }
+        
+        // Generate WAV audio data
+        const wavResult = await this.synthesizeToWav(text);
+        
+        if (!wavResult.success) {
+            return { success: false, reason: 'synthesis_failed' };
+        }
+        
+        return {
+            success: true,
+            audioData: wavResult.audioData,
+            text,
+            duration: wavResult.duration || 0
+        };
+    }
+    
+    /**
+     * Convert speech synthesis to WAV format
+     * @param {string} text - Text to synthesize
+     * @returns {Promise<Object>} - Object with audio data
+     */
+    async synthesizeToWav(text) {
+        return new Promise((resolve) => {
+            if (!this.isReady || !window.speechSynthesis) {
+                resolve({ success: false, reason: 'not_ready' });
+                return;
+            }
+            
+            // Process text for better synthesis
+            const processedText = this.preprocessText(text);
+            
+            // Create audio context
+            const AudioContext = window.AudioContext || window.webkitAudioContext;
+            if (!AudioContext) {
+                resolve({ success: false, reason: 'no_audio_context' });
+                return;
+            }
+            
+            const audioContext = new AudioContext();
+            
+            // Create media stream destination
+            const destination = audioContext.createMediaStreamDestination();
+            
+            // Create media recorder
+            const mediaRecorder = new MediaRecorder(destination.stream);
+            const audioChunks = [];
+            
+            // Set up event handlers
+            mediaRecorder.ondataavailable = (event) => {
+                if (event.data.size > 0) {
+                    audioChunks.push(event.data);
+                }
+            };
+            
+            mediaRecorder.onstop = () => {
+                // Create blob from chunks
+                const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
+                
+                // Convert blob to array buffer
+                const reader = new FileReader();
+                reader.onloadend = () => {
+                    resolve({
+                        success: true,
+                        audioData: reader.result
+                    });
+                };
+                
+                reader.onerror = () => {
+                    resolve({ success: false, reason: 'blob_read_error' });
+                };
+                
+                reader.readAsArrayBuffer(audioBlob);
+            };
+            
+            // Create utterance
+            const utterance = new SpeechSynthesisUtterance(processedText);
+            
+            // Set options
+            if (this.voiceOptions.voice) {
+                utterance.voice = this.voiceOptions.voice;
+            }
+            
+            utterance.rate = this.voiceOptions.rate;
+            utterance.pitch = this.voiceOptions.pitch;
+            utterance.volume = this.voiceOptions.volume;
+            
+            // Start recording
+            mediaRecorder.start();
+            
+            // Set up completion handling
+            utterance.onend = () => {
+                mediaRecorder.stop();
+            };
+            
+            utterance.onerror = (error) => {
+                console.error('Browser TTS: Synthesis error', error);
+                mediaRecorder.stop();
+                resolve({ success: false, reason: 'synthesis_error' });
+            };
+            
+            // Start speaking
+            window.speechSynthesis.speak(utterance);
+            
+            // Set timeout in case onend never fires
+            setTimeout(() => {
+                if (mediaRecorder.state === 'recording') {
+                    mediaRecorder.stop();
+                }
+            }, 30000); // 30-second timeout
+        });
+    }
+    
+    /**
+     * Speak preloaded audio data
+     * @param {Object} preloadedData - Data from preloadSpeech
+     * @param {Function} callback - Callback for when speech completes
+     * @returns {boolean} - Success status
+     */
+    speakPreloaded(preloadedData, callback = null) {
+        if (!preloadedData || !preloadedData.text) {
+            console.error('Browser TTS: Invalid preloaded data');
+            return false;
+        }
+        
+        // For browser TTS, we don't use the preloaded data directly
+        // Instead, we just speak the text again
+        return this.speak(preloadedData.text, callback);
+    }
+    
+    /**
+     * Preprocess text for TTS
+     * @param {string} text - Text to preprocess
+     * @returns {string} - Processed text
+     */
+    preprocessText(text) {
+        // Remove HTML tags
+        text = text.replace(/<[^>]*>/g, ' ');
+        
+        // Replace special characters with their spoken equivalents
+        text = text.replace(/&/g, ' and ');
+        
+        // Normalize whitespace
+        text = text.replace(/\s+/g, ' ').trim();
+        
+        return text;
+    }
+    
+    /**
+     * Stop speaking
+     * @returns {boolean} - Success status
+     */
+    stop() {
+        if (window.speechSynthesis) {
+            window.speechSynthesis.cancel();
+            this.isSpeaking = false;
+            this.currentUtterance = null;
+            return true;
+        }
+        return false;
+    }
+    
+    /**
+     * Get available voices
+     * @returns {Array} - Array of voice objects
+     */
+    async getVoices() {
+        if (!this.isReady) {
+            return [];
+        }
+        
+        const localization = this.getModule('localization');
+        const currentLocale = localization ? localization.getLocale() : 'en-us';
+        
+        // Normalize locale format
+        const normalizedLocale = currentLocale.toLowerCase().replace('_', '-');
+        const languageCode = normalizedLocale.split('-')[0];
+        
+        // Filter voices by current locale
+        const filteredVoices = this.voices.filter(voice => {
+            const voiceLang = voice.lang.toLowerCase();
+            return voiceLang.startsWith(languageCode) || 
+                   voiceLang === normalizedLocale ||
+                   (normalizedLocale.startsWith(voiceLang) && voiceLang.length === 2);
+        });
+        
+        // If matching voices found, use them
+        if (filteredVoices.length > 0) {
+            return filteredVoices.map(voice => ({
+                id: voice.voiceURI,
+                name: voice.name,
+                lang: voice.lang,
+                gender: this.inferVoiceGender(voice.name)
+            }));
+        }
+        
+        // If no matching voices found, return all voices
+        return this.voices.map(voice => ({
+            id: voice.voiceURI,
+            name: voice.name,
+            lang: voice.lang,
+            gender: this.inferVoiceGender(voice.name)
+        }));
+    }
+    
+    /**
+     * Infer voice gender from name
+     * @param {string} name - Voice name
+     * @returns {string} - Inferred gender ('male', 'female', or 'unknown')
+     */
+    inferVoiceGender(name) {
+        const lowerName = name.toLowerCase();
+        
+        // Common terms indicating gender
+        const maleTerms = ['male', 'man', 'guy', 'boy', 'mr', 'sir'];
+        const femaleTerms = ['female', 'woman', 'lady', 'girl', 'ms', 'mrs', 'miss'];
+        
+        // Check for explicit gender terms in the name
+        for (const term of maleTerms) {
+            if (lowerName.includes(term)) return 'male';
+        }
+        
+        for (const term of femaleTerms) {
+            if (lowerName.includes(term)) return 'female';
+        }
+        
+        return 'unknown';
+    }
+}
+
+// Register the module with the module registry
+// Module registry MUST be accessed via window, not direct import
+if (window.moduleRegistry) {
+    try {
+        // Create instance first, then register it
+        const browserTTSModule = new BrowserTTSModule();
+        window.moduleRegistry.register(browserTTSModule);
+        console.log('Browser TTS Module registered successfully');
+    } catch (err) {
+        console.error('Failed to register Browser TTS Module:', err);
+    }
+} else {
+    console.error('Module registry not available when attempting to register Browser TTS Module');
+}