ai.interactive.fiction/public/js/browser-tts-handler.js

/**
 * BrowserTTSHandler for AI Interactive Fiction
 * Implementation using the browser's Web Speech API
 */
import { TTSHandler } from './tts-handler.js';
import { moduleRegistry } from './module-registry.js';

export class BrowserTTSHandler extends TTSHandler {
    constructor() {
        super();
        this.id = 'browser';
        this.name = 'Browser TTS Handler';

        // Voice options
        this.voiceOptions = {
            voice: null, // Will be set during initialization
            rate: 1.0,
            pitch: 1.0,
            volume: 1.0
        };

        // State
        this.available = false;
        this.voices = [];
        this.currentUtterance = null;

        // Add dependencies
        this.dependencies = ['localization', 'persistence-manager'];

        // Bind methods
        this.bindMethods([
            'initialize',
            'speak',
            'speakPreloaded',
            'preloadSpeech',
            'stop',
            'isAvailable',
            'getId',
            'getVoices',
            'setVoiceOptions',
            'onVoicesChanged',
            'getModule'
        ]);
    }

    /**
     * Get a module from the registry
     * @param {string} moduleId - ID of the module to get
     * @returns {Object|null} - The module or null if not found
     */
    getModule(moduleId) {
        return moduleRegistry.getModule(moduleId);
    }

    /**
     * Initialize the browser TTS handler
     * @param {Function} progressCallback - Callback for progress updates
     * @returns {Promise<boolean>} - Resolves with success status
     */
    async initialize(progressCallback = null) {
        try {
            if (progressCallback) {
                progressCallback(10, 'Initializing Browser TTS');
            }

            this.changeState('LOADING');

            // Check for browser support
            if (!window.speechSynthesis) {
                console.warn('Browser TTS: Speech synthesis not available in this browser');
                if (progressCallback) {
                    progressCallback(100, 'Browser TTS not available');
                }
                this.changeState('ERROR');
                return false;
            }

            if (progressCallback) {
                progressCallback(30, 'Browser TTS supported');
            }

            // Check for required dependencies
            const localization = this.getModule('localization');
            const persistenceManager = this.getModule('persistence-manager');

            if (!localization) {
                console.error('Browser TTS: Required dependency \'localization\' not found');
                this.changeState('ERROR');
                return false;
            }

            if (!persistenceManager) {
                console.error('Browser TTS: Required dependency \'persistence-manager\' not found');
                this.changeState('ERROR');
                return false;
            }

            if (progressCallback) {
                progressCallback(40, 'Browser TTS dependencies loaded');
            }

            // Load voices - but don't fail initialization if no voices are found yet
            // The browser may provide voices later
            try {
                await this.loadVoices();
                console.log(`Browser TTS: Loaded ${this.voices.length} voices initially`);
            } catch (error) {
                console.warn('Browser TTS: Error loading voices initially:', error);
                // Don't fail initialization - voices may become available later
                this.voices = [];
            }

            if (progressCallback) {
                progressCallback(60, `Browser TTS loaded ${this.voices.length} voices`);
            }

            // Set speech options from preferences
            try {
                const rate = persistenceManager.getPreference('tts', 'speed', 1.0);
                const pitch = persistenceManager.getPreference('tts', 'pitch', 1.0);
                const volume = persistenceManager.getPreference('tts', 'volume', 1.0);

                this.options.rate = parseFloat(rate);
                this.options.pitch = parseFloat(pitch);
                this.options.volume = parseFloat(volume);

                // Log all available voices for debugging
                console.log('Browser TTS: Available voices:', this.voices.map(v => `${v.name} (${v.lang})`));

                // Set voice based on locale
                const locale = localization.getLocale();
                console.log(`Browser TTS: Setting voice for locale: ${locale}`);
                const preferredVoice = persistenceManager.getPreference('tts', 'browser_voice');
                await this.selectVoiceForLocale(locale, preferredVoice);

                if (progressCallback) {
                    progressCallback(80, 'Browser TTS voice selected');
                }
            } catch (error) {
                console.warn('Browser TTS: Error setting speech options:', error);
                // Don't fail initialization due to voice selection issues
            }

            // If voices were loaded but no voice is selected, try to set a default
            if (this.voices.length > 0 && !this.voiceOptions.voice) {
                console.warn('Browser TTS: No voice selected after initialization, trying fallback');
                this.voiceOptions.voice = this.voices[0];
            }

            // Always mark as available if speech synthesis is supported, regardless of voice selection
            // This ensures the Browser TTS option always appears in the dropdown
            this.available = true;
            this.isReady = true;

            if (progressCallback) {
                progressCallback(100, 'Browser TTS initialized');
            }

            this.changeState('FINISHED');
            return true;
        } catch (error) {
            console.error('Browser TTS: Initialization error:', error);
            if (progressCallback) {
                progressCallback(100, `Browser TTS initialization failed - ${error.message}`);
            }
            this.changeState('ERROR');
            return false;
        }
    }

    /**
     * Handle voices changed event
     */
    async onVoicesChanged() {
        await this.loadVoices();
        const localization = this.getModule('localization');
        const persistenceManager = this.getModule('persistence-manager');
        let currentLocale = localization ? localization.getLocale() : 'en-us';
        let preferredVoice = persistenceManager ? persistenceManager.getPreference('tts', 'voice', '') : '';
        await this.selectVoiceForLocale(currentLocale, preferredVoice);
    }

    /**
     * Load available voices
     * @returns {Promise<void>}
     */
    async loadVoices() {
        return new Promise(resolve => {
            // Helper function to filter and sort voices
            const processVoices = () => {
                this.voices = speechSynthesis.getVoices() || [];

                // Log all available voices for debugging
                console.log('Browser TTS: Raw loaded voices:',
                    this.voices.map(v => `${v.name} (${v.lang})`));

                // Ensure we have at least one voice
                if (this.voices.length === 0) {
                    console.warn('Browser TTS: No voices available from speech synthesis');
                    resolve();
                    return;
                }

                // Sort voices to prioritize English voices first
                this.voices.sort((a, b) => {
                    // Put English voices first
                    const aIsEnglish = a.lang.toLowerCase().startsWith('en');
                    const bIsEnglish = b.lang.toLowerCase().startsWith('en');

                    if (aIsEnglish && !bIsEnglish) return -1;
                    if (!aIsEnglish && bIsEnglish) return 1;

                    // Then sort by language
                    return a.lang.localeCompare(b.lang);
                });

                console.log('Browser TTS: Sorted voices:',
                    this.voices.map(v => `${v.name} (${v.lang})`));

                resolve();
            };

            // Some browsers need a timeout to get voices
            const timeoutId = setTimeout(() => {
                if (this.voices.length === 0) {
                    console.log('Browser TTS: Using timeout fallback to get voices');
                    processVoices();
                }
            }, 1000);

            // Try to get voices immediately
            this.voices = speechSynthesis.getVoices() || [];
            if (this.voices.length > 0) {
                clearTimeout(timeoutId);
                console.log(`Browser TTS: Loaded ${this.voices.length} voices immediately`);
                processVoices();
            } else {
                // If no voices are available yet, set up the onvoiceschanged event
                speechSynthesis.onvoiceschanged = () => {
                    clearTimeout(timeoutId);
                    console.log('Browser TTS: Voices changed event fired');
                    processVoices();
                    speechSynthesis.onvoiceschanged = null;
                };
            }
        });
    }

    /**
     * Set voice based on locale
     * @param {string} locale - Locale code (e.g., 'en-us', 'de', 'fr')
     * @param {string} preferredVoice - Optional preferred voice name
     * @returns {Promise<void>}
     */
    async selectVoiceForLocale(locale = 'en-us', preferredVoice = '') {
        // Debug voice selection process
        console.log(`Browser TTS: Selecting voice for locale ${locale}, preferred voice: ${preferredVoice || 'none'}`);
        console.log(`Browser TTS: Available voices:`, this.voices.map(v => `${v.name} (${v.lang})`));

        // Normalize locale for comparison
        const normalizedLocale = locale.toLowerCase();
        const languageCode = normalizedLocale.split('-')[0]; // e.g., 'en' from 'en-us'

        console.log(`Browser TTS: Normalized locale: ${normalizedLocale}, language code: ${languageCode}`);

        // If we have a preferred voice, try to use it first
        if (preferredVoice) {
            const matchingVoice = this.voices.find(voice =>
                voice.name === preferredVoice ||
                voice.voiceURI === preferredVoice
            );

            if (matchingVoice) {
                this.voiceOptions.voice = matchingVoice;
                console.log(`Browser TTS: Using preferred voice: ${matchingVoice.name}`);
                return;
            }
        }

        // Find voices exactly matching the locale (e.g., 'en-us')
        const exactLocaleVoices = this.voices.filter(voice => {
            const voiceLocale = voice.lang.toLowerCase();
            return voiceLocale === normalizedLocale;
        });

        console.log(`Browser TTS: Found ${exactLocaleVoices.length} exact locale matches for ${normalizedLocale}`);

        if (exactLocaleVoices.length > 0) {
            // Use the first matching voice
            this.voiceOptions.voice = exactLocaleVoices[0];
            console.log(`Browser TTS: Using exact locale match for ${normalizedLocale}: ${this.voiceOptions.voice.name}`);
            return;
        }

        // Find voices matching the language code (e.g., 'en')
        const languageVoices = this.voices.filter(voice => {
            const voiceLocale = voice.lang.toLowerCase();
            console.log(`Browser TTS: Comparing voice lang ${voiceLocale} with language code ${languageCode}`);
            return voiceLocale.startsWith(languageCode) ||
                   (voiceLocale.length === 2 && languageCode.startsWith(voiceLocale));
        });

        console.log(`Browser TTS: Found ${languageVoices.length} language matches for ${languageCode}`);

        if (languageVoices.length > 0) {
            // Use the first matching voice
            this.voiceOptions.voice = languageVoices[0];
            console.log(`Browser TTS: Using language match for ${languageCode}: ${this.voiceOptions.voice.name}`);
            return;
        }

        // If current language is not English and no matching voice found, try to find English voices
        if (languageCode !== 'en') {
            const englishVoices = this.voices.filter(voice =>
                voice.lang.toLowerCase().startsWith('en')
            );

            console.log(`Browser TTS: Found ${englishVoices.length} English voices as fallback`);

            if (englishVoices.length > 0) {
                this.voiceOptions.voice = englishVoices[0];
                console.log(`Browser TTS: No ${languageCode} voice found, using English voice: ${this.voiceOptions.voice.name}`);
                return;
            }
        }

        // As a last resort, use any available voice
        if (this.voices.length > 0) {
            this.voiceOptions.voice = this.voices[0];
            console.log(`Browser TTS: No matching voice found, using first available voice: ${this.voiceOptions.voice.name}`);
        } else {
            console.log("Browser TTS: No voices available");
        }
    }

    /**
     * Preload speech for a text
     * @param {string} text - Text to preload
     * @returns {Promise<Object>} - Preloaded speech data
     */
    async preloadSpeech(text) {
        if (!this.available || !text || !this.voiceOptions.voice) {
            return null;
        }

        try {
            // Process text for TTS
            const processedText = this.preprocessText(text);

            console.log(`Browser TTS: Preloading speech for: "${processedText.substring(0, 50)}${processedText.length > 50 ? '...' : ''}"`);

            // Use MediaRecorder to capture audio output to WAV
            const audioData = await this.synthesizeToWav(processedText);
            if (!audioData) {
                console.warn("Browser TTS: Failed to generate WAV audio");
                return null;
            }

            // Create audio element from blob
            const audio = new Audio(URL.createObjectURL(audioData.blob));

            // Store preloaded data in the centralized TTSFactory cache
            const preloadData = {
                audio: audio,
                blob: audioData.blob,
                text: processedText
            };

            // Use the TTSFactory's cache instead of a local cache
            // this.preloadCache.set(text, preloadData);
            // Instead, return the preloaded data to be stored in the TTSFactory's cache
            return preloadData;
        } catch (error) {
            console.warn("Browser TTS: Error preloading speech:", error);
            return null;
        }
    }

    /**
     * Convert speech synthesis to WAV format
     * @param {string} text - Text to synthesize
     * @returns {Promise<Object>} - Object with WAV blob
     */
    synthesizeToWav(text) {
        return new Promise((resolve, reject) => {
            try {
                // Create utterance
                const utterance = new SpeechSynthesisUtterance(text);

                // Set voice and options
                utterance.voice = this.voiceOptions.voice;
                utterance.rate = this.voiceOptions.rate;
                utterance.pitch = this.voiceOptions.pitch;
                utterance.volume = this.voiceOptions.volume;
                utterance.lang = this.voiceOptions.voice.lang;

                // Use Web Audio API to capture the speech output
                const audioContext = new (window.AudioContext || window.webkitAudioContext)();
                const destination = audioContext.createMediaStreamDestination();
                const mediaRecorder = new MediaRecorder(destination.stream);
                const audioChunks = [];

                // Capture the audio chunks
                mediaRecorder.ondataavailable = (event) => {
                    if (event.data.size > 0) {
                        audioChunks.push(event.data);
                    }
                };

                // When recording completes
                mediaRecorder.onstop = () => {
                    // Create a WAV blob from the audio chunks
                    const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
                    resolve({ blob: audioBlob });
                };

                // Set up speech synthesis events
                utterance.onstart = () => {
                    console.log("Browser TTS: Started synthesizing audio to WAV");
                    mediaRecorder.start();
                };

                utterance.onend = () => {
                    console.log("Browser TTS: Finished synthesizing audio to WAV");
                    mediaRecorder.stop();
                };

                utterance.onerror = (error) => {
                    console.error("Browser TTS: Error synthesizing audio:", error);
                    reject(error);
                };

                // Start the speech synthesis
                speechSynthesis.speak(utterance);

                // If synthesis doesn't start within a reasonable timeout, reject the promise
                const timeout = setTimeout(() => {
                    if (mediaRecorder.state === 'inactive') {
                        console.warn("Browser TTS: Synthesis to WAV timed out");
                        reject(new Error("Synthesis timed out"));
                    }
                }, 5000);

                // Clear timeout when synthesis starts
                utterance.onstart = () => {
                    clearTimeout(timeout);
                    console.log("Browser TTS: Started synthesizing audio to WAV");
                    mediaRecorder.start();
                };
            } catch (error) {
                console.error("Browser TTS: Error setting up WAV synthesis:", error);
                reject(error);
            }
        });
    }

    /**
     * Speak text using preloaded utterance
     * @param {Object} preloadData - Preloaded speech data
     * @param {Function} callback - Callback for when speech completes
     * @returns {boolean} - Success status
     */
    speakPreloaded(preloadData, callback = null) {
        if (!this.available || !preloadData || !preloadData.audio) {
            if (callback) {
                setTimeout(() => callback({ success: false, reason: 'no_preloaded_data' }), 0);
            }
            return false;
        }

        try {
            // Stop any current speech
            this.stop();

            const { audio, text } = preloadData;

            // Dispatch start event
            this.dispatchEvent('tts:speak:start', { text });

            // Set up event listeners
            audio.onended = () => {
                this.currentUtterance = null;

                // Dispatch end event
                this.dispatchEvent('tts:speak:end', { text });

                if (callback) {
                    callback({ success: true });
                }
            };

            audio.onerror = (error) => {
                this.currentUtterance = null;

                // Dispatch error event
                this.dispatchEvent('tts:speak:error', {
                    text,
                    error: error.error || 'Unknown error'
                });

                if (callback) {
                    callback({ success: false, reason: 'audio_error', error });
                }
            };

            // Store reference to current utterance
            this.currentUtterance = audio;

            // Play the audio
            audio.play();

            return true;
        } catch (error) {
            console.error("Browser TTS: Error playing preloaded speech:", error);

            // Dispatch error event
            this.dispatchEvent('tts:speak:error', {
                text: preloadData.text,
                error: error.message || 'Unknown error'
            });

            if (callback) {
                setTimeout(() => callback({ success: false, reason: 'audio_error', error }), 0);
            }

            return false;
        }
    }

    /**
     * Speak text
     * @param {string} text - Text to speak
     * @param {Function} callback - Callback for when speech completes
     * @returns {boolean} - Success status
     */
    async speak(text, callback = null) {
        if (!this.available || !text) {
            if (callback) {
                setTimeout(() => callback({ success: false, reason: 'not_available' }), 0);
            }
            return false;
        }

        try {
            // Process text for TTS
            const processedText = this.preprocessText(text);

            // Use MediaRecorder to capture audio output to WAV
            const audioData = await this.synthesizeToWav(processedText);
            if (!audioData) {
                console.warn("Browser TTS: Failed to generate WAV audio");
                if (callback) {
                    setTimeout(() => callback({ success: false, reason: 'synthesis_error' }), 0);
                }
                return false;
            }

            // Create audio element from blob
            const audio = new Audio(URL.createObjectURL(audioData.blob));

            // Dispatch start event
            this.dispatchEvent('tts:speak:start', { text: processedText });

            // Set up event listeners
            audio.onended = () => {
                this.currentUtterance = null;

                // Dispatch end event
                this.dispatchEvent('tts:speak:end', { text: processedText });

                if (callback) {
                    callback({ success: true });
                }
            };

            audio.onerror = (error) => {
                this.currentUtterance = null;

                // Dispatch error event
                this.dispatchEvent('tts:speak:error', {
                    text: processedText,
                    error: error.message || 'Unknown error'
                });

                if (callback) {
                    callback({ success: false, reason: 'audio_error', error });
                }
            };

            // Store the current utterance for stopping later
            this.currentUtterance = audio;

            // Play the audio
            audio.play();

            return true;
        } catch (error) {
            console.error("Browser TTS: Error speaking:", error);

            // Dispatch error event
            this.dispatchEvent('tts:speak:error', {
                text,
                error: error.message || 'Unknown error'
            });

            if (callback) {
                setTimeout(() => callback({ success: false, reason: 'synthesis_error', error }), 0);
            }

            return false;
        }
    }

    /**
     * Preprocess text for TTS
     * @param {string} text - Text to preprocess
     * @returns {string} - Processed text
     */
    preprocessText(text) {
        if (!text) return '';

        // Trim whitespace
        let processed = text.trim();

        // Replace multiple spaces with a single space
        processed = processed.replace(/\s+/g, ' ');

        // Add a period at the end if there's no punctuation
        if (!/[.!?]$/.test(processed)) {
            processed += '.';
        }

        return processed;
    }

    /**
     * Stop speaking
     */
    stop() {
        if (this.currentUtterance) {
            if (this.currentUtterance.stop) {
                this.currentUtterance.stop();
            } else if (this.currentUtterance.pause) {
                this.currentUtterance.pause();
            }
            this.currentUtterance = null;
        }
    }

    /**
     * Check if TTS is available
     * @returns {boolean} - True if TTS is available
     */
    isAvailable() {
        return this.available && this.voiceOptions.voice !== null;
    }

    /**
     * Get handler ID
     * @returns {string} - Handler ID
     */
    getId() {
        return this.id;
    }

    /**
     * Get available voices
     * @returns {Array} - Array of voice objects
     */
    getVoices() {
        // Get localization module for current locale
        const localization = this.getModule('localization');
        let currentLocale = localization ? localization.getLocale() : 'en-us';

        // Create language code variations for matching
        const languageCode = currentLocale.split('-')[0]; // e.g., 'en' from 'en-us'

        // Filter voices by current locale
        const filteredVoices = this.voices.filter(voice => {
            const voiceLang = voice.lang.toLowerCase();
            return voiceLang.startsWith(languageCode) ||
                   voiceLang === currentLocale ||
                   // For handling cases like 'en' matching 'en-us'
                   (currentLocale.startsWith(voiceLang) && voiceLang.length === 2);
        });

        // If matching voices found, use them
        if (filteredVoices.length > 0) {
            return filteredVoices.map(voice => ({
                id: voice.voiceURI,
                name: voice.name,
                lang: voice.lang,
                gender: this.inferVoiceGender(voice.name)
            }));
        }

        // If no matching voices found and current locale isn't English,
        // try to fallback to English voices
        if (languageCode !== 'en') {
            const englishVoices = this.voices.filter(voice => {
                const voiceLang = voice.lang.toLowerCase();
                return voiceLang.startsWith('en');
            });

            if (englishVoices.length > 0) {
                return englishVoices.map(voice => ({
                    id: voice.voiceURI,
                    name: voice.name,
                    lang: voice.lang,
                    gender: this.inferVoiceGender(voice.name)
                }));
            }
        }

        // As a last resort, return all voices
        return this.voices.map(voice => ({
            id: voice.voiceURI,
            name: voice.name,
            lang: voice.lang,
            gender: this.inferVoiceGender(voice.name)
        }));
    }

    /**
     * Infer voice gender from name
     * @param {string} name - Voice name
     * @returns {string} - Inferred gender ('male', 'female', or 'unknown')
     */
    inferVoiceGender(name) {
        const lowerName = name.toLowerCase();

        // Common terms indicating gender
        const maleTerms = ['male', 'man', 'guy', 'boy', 'mr', 'sir', 'him', 'his'];
        const femaleTerms = ['female', 'woman', 'lady', 'girl', 'ms', 'mrs', 'miss', 'her', 'hers'];

        // Check for explicit gender terms in the name
        for (const term of maleTerms) {
            if (lowerName.includes(term)) return 'male';
        }

        for (const term of femaleTerms) {
            if (lowerName.includes(term)) return 'female';
        }

        // Common male/female voice names
        if (/(david|james|john|paul|mark|thomas|daniel|jack|william|george|michael|robert|peter|brian|richard|steve|bruce)/i.test(lowerName)) {
            return 'male';
        }

        if (/(mary|sarah|emma|susan|julia|karen|lisa|anna|laura|amy|elizabeth|jennifer|maria|emily|jessica|alice|victoria)/i.test(lowerName)) {
            return 'female';
        }

        return 'unknown';
    }

    /**
     * Set voice options
     * @param {Object} options - Voice options
     */
    setVoiceOptions(options = {}) {
        if (options.voice) {
            // Find the voice by ID or name
            const voice = this.voices.find(v =>
                v.voiceURI === options.voice ||
                v.name === options.voice
            );

            if (voice) {
                this.voiceOptions.voice = voice;
            }
        }

        if (typeof options.rate === 'number') {
            // Clamp rate between 0.1 and 10
            this.voiceOptions.rate = Math.max(0.1, Math.min(10, options.rate));
        }

        if (typeof options.pitch === 'number') {
            // Clamp pitch between 0 and 2
            this.voiceOptions.pitch = Math.max(0, Math.min(2, options.pitch));
        }

        if (typeof options.volume === 'number') {
            // Clamp volume between 0 and 1
            this.voiceOptions.volume = Math.max(0, Math.min(1, options.volume));
        }
    }
}