ai.interactive.fiction/public/js/browser-tts-module.js

/**
 * BrowserTTSModule for AI Interactive Fiction
 * Implementation using the browser's Web Speech API
 */
import { TTSHandlerModule } from './tts-handler-module.js';

/**
 * Browser TTS Module - Uses the browser's Web Speech API for TTS
 */
export class BrowserTTSModule extends TTSHandlerModule {
    constructor() {
        super('browser', 'Browser TTS');

        // Voice options
        this.voiceOptions = {
            voice: null, // Will be set during initialization
            rate: 1.0,
            pitch: 1.0,
            volume: 1.0
        };

        // State
        this.available = false;
        this.currentUtterance = null;

        // Ensure dependencies are correctly defined from parent class
        // this.dependencies should already contain ['persistence-manager', 'localization']

        // Bind additional methods beyond those in TTSHandlerModule
        this.bindMethods([
            'onVoicesChanged',
            'loadVoices',
            'selectVoiceForLocale',
            'synthesizeToWav',
            'speakPreloaded',
            'speak',
            'preprocessText',
            'inferVoiceGender'
        ]);
    }

    /**
     * Initialize the browser TTS module
     * @returns {Promise<boolean>} - Resolves with success status
     */
    async initialize() {
        try {
            this.reportProgress(10, 'Initializing Browser TTS');

            // Check for browser support
            if (!window.speechSynthesis) {
                console.error('Browser TTS: Speech synthesis not available in this browser');
                return false;
            }

            this.reportProgress(30, 'Browser TTS supported');

            // Initialize parent
            const parentInit = await super.initialize();
            if (!parentInit) {
                console.error('Browser TTS: Parent initialization failed');
                return false;
            }

            // Get required dependencies
            const persistenceManager = this.getModule('persistence-manager');
            if (!persistenceManager) {
                console.error('Browser TTS: Required dependency persistence-manager not found');
                return false;
            }

            const localization = this.getModule('localization');
            if (!localization) {
                console.error('Browser TTS: Required dependency localization not found');
                return false;
            }

            // Load voices
            const voicesLoaded = await this.loadVoices();
            if (!voicesLoaded) {
                console.error('Browser TTS: Failed to load voices');
                return false;
            }

            // Set speech options from preferences
            this.voiceOptions.rate = persistenceManager.getPreference('tts', 'rate', 1.0);
            this.voiceOptions.pitch = persistenceManager.getPreference('tts', 'pitch', 1.0);
            this.voiceOptions.volume = persistenceManager.getPreference('tts', 'volume', 1.0);
            const preferredVoice = persistenceManager.getPreference('tts', 'browser_voice', '');

            // Set voice based on current locale
            const currentLocale = localization.getLocale() || 'en-us';
            await this.selectVoiceForLocale(currentLocale, preferredVoice);

            // Listen for locale changes
            document.addEventListener('locale:changed', async (event) => {
                if (event.detail && event.detail.locale) {
                    await this.selectVoiceForLocale(event.detail.locale);
                }
            });

            // Listen for voices changed events
            if (window.speechSynthesis.onvoiceschanged !== undefined) {
                window.speechSynthesis.onvoiceschanged = this.onVoicesChanged;
            }

            this.isReady = true;
            this.available = true;
            this.reportProgress(100, 'Browser TTS initialized');

            return true;
        } catch (error) {
            console.error('Browser TTS: Initialization error:', error);
            this.isReady = false;
            this.available = false;
            return false;
        }
    }

    /**
     * Handle voices changed event
     */
    async onVoicesChanged() {
        await this.loadVoices();

        // Re-select voice based on current locale
        const localization = this.getModule('localization');
        const persistenceManager = this.getModule('persistence-manager');

        if (localization && persistenceManager) {
            const currentLocale = localization.getLocale() || 'en-us';
            const preferredVoice = persistenceManager.getPreference('tts', 'browser_voice', '');
            await this.selectVoiceForLocale(currentLocale, preferredVoice);
        }
    }

    /**
     * Load available voices from the speech synthesis API
     * @returns {Promise<boolean>} - Resolves with success status
     */
    async loadVoices() {
        try {
            this.reportProgress(40, 'Loading browser voices');

            // Try to get voices
            let voices = window.speechSynthesis.getVoices();

            // If voices array is empty, wait for onvoiceschanged event
            if (!voices || voices.length === 0) {
                try {
                    console.log('Browser TTS: No voices available immediately, waiting for voices to load...');

                    // Wait for voices to be loaded (with timeout)
                    voices = await new Promise((resolve, reject) => {
                        // Set a timeout in case voices never load
                        const timeout = setTimeout(() => {
                            console.warn('Browser TTS: Timeout waiting for voices');
                            // Resolve with empty array instead of rejecting
                            resolve([]);
                        }, 3000);

                        // Listen for voices changed event
                        window.speechSynthesis.onvoiceschanged = () => {
                            clearTimeout(timeout);
                            const loadedVoices = window.speechSynthesis.getVoices();
                            console.log(`Browser TTS: Voices loaded, found ${loadedVoices.length} voices`);
                            resolve(loadedVoices);
                        };
                    });
                } catch (voiceWaitError) {
                    console.error('Browser TTS: Error waiting for voices:', voiceWaitError);
                    // Continue with empty voices array
                    voices = [];
                }
            }

            // Store voices
            this.voices = voices || [];

            // Log available voices for debugging
            console.log(`Browser TTS: Loaded ${this.voices.length} voices`);
            if (this.voices.length > 0) {
                console.log('Browser TTS: First few voices:', this.voices.slice(0, 3));
            }

            // If no voices available but speech synthesis is supported, still return true
            // Some browsers may not expose voices but still support speech synthesis
            if (this.voices.length === 0) {
                console.warn('Browser TTS: No voices available, but continuing with default voice');
                // Create a default voice entry
                this.voices = [{
                    default: true,
                    lang: 'en-US',
                    localService: true,
                    name: 'Default Voice',
                    voiceURI: 'default'
                }];
            }

            this.reportProgress(60, 'Browser voices loaded');
            return true;
        } catch (error) {
            console.error('Browser TTS: Error loading voices:', error);
            return false;
        }
    }

    /**
     * Set voice based on locale
     * @param {string} locale - Locale code (e.g., 'en-us', 'de', 'fr')
     * @param {string} preferredVoice - Optional preferred voice name
     * @returns {Promise<boolean>} - Success status
     */
    async selectVoiceForLocale(locale = 'en-us', preferredVoice = '') {
        // Normalize locale format
        locale = locale.toLowerCase().replace('_', '-');
        const languageCode = locale.split('-')[0];

        // First try to use the preferred voice if specified
        if (preferredVoice) {
            const voice = this.voices.find(v =>
                v.name === preferredVoice ||
                v.voiceURI === preferredVoice
            );

            if (voice) {
                this.voiceOptions.voice = voice;
                return true;
            }
        }

        // Try to find a voice that matches the exact locale
        const exactMatch = this.voices.find(v =>
            v.lang.toLowerCase() === locale
        );

        if (exactMatch) {
            this.voiceOptions.voice = exactMatch;
            return true;
        }

        // Try to find a voice that matches the language code
        const languageMatch = this.voices.find(v =>
            v.lang.toLowerCase().startsWith(languageCode)
        );

        if (languageMatch) {
            this.voiceOptions.voice = languageMatch;
            return true;
        }

        // Fallback to the first available voice
        if (this.voices.length > 0) {
            this.voiceOptions.voice = this.voices[0];
            return true;
        }

        // No voices available
        return false;
    }

    /**
     * Speak text
     * @param {string} text - Text to speak
     * @param {Function} callback - Callback for when speech completes
     * @returns {boolean} - Success status
     */
    speak(text, callback = null) {
        if (!this.isReady || !window.speechSynthesis) {
            if (callback) {
                callback({ success: false, reason: 'not_ready' });
            }
            return false;
        }

        // Stop any ongoing speech
        this.stop();

        const processedText = this.preprocessText(text);

        // Create utterance
        const utterance = new SpeechSynthesisUtterance(processedText);

        // Set options
        if (this.voiceOptions.voice) {
            utterance.voice = this.voiceOptions.voice;
        }

        utterance.rate = this.voiceOptions.rate;
        utterance.pitch = this.voiceOptions.pitch;
        utterance.volume = this.voiceOptions.volume;

        // Set up event handlers
        utterance.onend = () => {
            this.isSpeaking = false;
            if (callback) {
                callback({ success: true });
            }
        };

        utterance.onerror = (error) => {
            this.isSpeaking = false;
            console.error('Browser TTS: Speech error', error);
            if (callback) {
                callback({ success: false, reason: 'synthesis_error', error });
            }
        };

        // Store current utterance
        this.currentUtterance = utterance;
        this.isSpeaking = true;

        // Start speaking
        window.speechSynthesis.speak(utterance);

        return true;
    }

    /**
     * Preload speech for a text
     * @param {string} text - Text to preload
     * @returns {Promise<Object>} - Preloaded speech data
     */
    async preloadSpeech(text) {
        if (!this.isReady || !window.speechSynthesis) {
            return { success: false, reason: 'not_ready' };
        }

        // Generate WAV audio data
        const wavResult = await this.synthesizeToWav(text);

        if (!wavResult.success) {
            return { success: false, reason: 'synthesis_failed' };
        }

        return {
            success: true,
            audioData: wavResult.audioData,
            text,
            duration: wavResult.duration || 0
        };
    }

    /**
     * Convert speech synthesis to WAV format
     * @param {string} text - Text to synthesize
     * @returns {Promise<Object>} - Object with audio data
     */
    async synthesizeToWav(text) {
        return new Promise((resolve) => {
            if (!this.isReady || !window.speechSynthesis) {
                resolve({ success: false, reason: 'not_ready' });
                return;
            }

            // Process text for better synthesis
            const processedText = this.preprocessText(text);

            // Create audio context
            const AudioContext = window.AudioContext || window.webkitAudioContext;
            if (!AudioContext) {
                resolve({ success: false, reason: 'no_audio_context' });
                return;
            }

            const audioContext = new AudioContext();

            // Create media stream destination
            const destination = audioContext.createMediaStreamDestination();

            // Create media recorder
            const mediaRecorder = new MediaRecorder(destination.stream);
            const audioChunks = [];

            // Set up event handlers
            mediaRecorder.ondataavailable = (event) => {
                if (event.data.size > 0) {
                    audioChunks.push(event.data);
                }
            };

            mediaRecorder.onstop = () => {
                // Create blob from chunks
                const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });

                // Convert blob to array buffer
                const reader = new FileReader();
                reader.onloadend = () => {
                    resolve({
                        success: true,
                        audioData: reader.result
                    });
                };

                reader.onerror = () => {
                    resolve({ success: false, reason: 'blob_read_error' });
                };

                reader.readAsArrayBuffer(audioBlob);
            };

            // Create utterance
            const utterance = new SpeechSynthesisUtterance(processedText);

            // Set options
            if (this.voiceOptions.voice) {
                utterance.voice = this.voiceOptions.voice;
            }

            utterance.rate = this.voiceOptions.rate;
            utterance.pitch = this.voiceOptions.pitch;
            utterance.volume = this.voiceOptions.volume;

            // Start recording
            mediaRecorder.start();

            // Set up completion handling
            utterance.onend = () => {
                mediaRecorder.stop();
            };

            utterance.onerror = (error) => {
                console.error('Browser TTS: Synthesis error', error);
                mediaRecorder.stop();
                resolve({ success: false, reason: 'synthesis_error' });
            };

            // Start speaking
            window.speechSynthesis.speak(utterance);

            // Set timeout in case onend never fires
            setTimeout(() => {
                if (mediaRecorder.state === 'recording') {
                    mediaRecorder.stop();
                }
            }, 30000); // 30-second timeout
        });
    }

    /**
     * Speak preloaded audio data
     * @param {Object} preloadedData - Data from preloadSpeech
     * @param {Function} callback - Callback for when speech completes
     * @returns {boolean} - Success status
     */
    speakPreloaded(preloadedData, callback = null) {
        if (!preloadedData || !preloadedData.text) {
            console.error('Browser TTS: Invalid preloaded data');
            return false;
        }

        // For browser TTS, we don't use the preloaded data directly
        // Instead, we just speak the text again
        return this.speak(preloadedData.text, callback);
    }

    /**
     * Preprocess text for TTS
     * @param {string} text - Text to preprocess
     * @returns {string} - Processed text
     */
    preprocessText(text) {
        // Remove HTML tags
        text = text.replace(/<[^>]*>/g, ' ');

        // Replace special characters with their spoken equivalents
        text = text.replace(/&/g, ' and ');

        // Normalize whitespace
        text = text.replace(/\s+/g, ' ').trim();

        return text;
    }

    /**
     * Stop speaking
     * @returns {boolean} - Success status
     */
    stop() {
        if (window.speechSynthesis) {
            window.speechSynthesis.cancel();
            this.isSpeaking = false;
            this.currentUtterance = null;
            return true;
        }
        return false;
    }

    /**
     * Get available voices
     * @returns {Array} - Array of voice objects
     */
    async getVoices() {
        if (!this.isReady) {
            return [];
        }

        const localization = this.getModule('localization');
        const currentLocale = localization ? localization.getLocale() : 'en-us';

        // Normalize locale format
        const normalizedLocale = currentLocale.toLowerCase().replace('_', '-');
        const languageCode = normalizedLocale.split('-')[0];

        // Filter voices by current locale
        const filteredVoices = this.voices.filter(voice => {
            const voiceLang = voice.lang.toLowerCase();
            return voiceLang.startsWith(languageCode) ||
                   voiceLang === normalizedLocale ||
                   (normalizedLocale.startsWith(voiceLang) && voiceLang.length === 2);
        });

        // If matching voices found, use them
        if (filteredVoices.length > 0) {
            return filteredVoices.map(voice => ({
                id: voice.voiceURI,
                name: voice.name,
                lang: voice.lang,
                gender: this.inferVoiceGender(voice.name)
            }));
        }

        // If no matching voices found, return all voices
        return this.voices.map(voice => ({
            id: voice.voiceURI,
            name: voice.name,
            lang: voice.lang,
            gender: this.inferVoiceGender(voice.name)
        }));
    }

    /**
     * Infer voice gender from name
     * @param {string} name - Voice name
     * @returns {string} - Inferred gender ('male', 'female', or 'unknown')
     */
    inferVoiceGender(name) {
        const lowerName = name.toLowerCase();

        // Common terms indicating gender
        const maleTerms = ['male', 'man', 'guy', 'boy', 'mr', 'sir'];
        const femaleTerms = ['female', 'woman', 'lady', 'girl', 'ms', 'mrs', 'miss'];

        // Check for explicit gender terms in the name
        for (const term of maleTerms) {
            if (lowerName.includes(term)) return 'male';
        }

        for (const term of femaleTerms) {
            if (lowerName.includes(term)) return 'female';
        }

        return 'unknown';
    }
}

// Register the module with the module registry
// Module registry MUST be accessed via window, not direct import
if (window.moduleRegistry) {
    try {
        // Create instance first, then register it
        const browserTTSModule = new BrowserTTSModule();
        window.moduleRegistry.register(browserTTSModule);
        console.log('Browser TTS Module registered successfully');
    } catch (err) {
        console.error('Failed to register Browser TTS Module:', err);
    }
} else {
    console.error('Module registry not available when attempting to register Browser TTS Module');
}