/** * Text-to-Speech Handler for AI Interactive Fiction * Enhanced version with improved voice selection, caching, and playback controls */ export class TTSHandler { constructor() { this.enabled = false; this.speaking = false; this.paused = false; this.utterance = null; this.voiceCache = []; this.preferredVoice = null; this.audioCache = new Map(); // Cache for audio segments this.currentSpeed = 1.0; this.hasUserActivation = false; this.permissionError = false; this.speakQueue = []; this.isSpeakingFromQueue = false; // Flag to track when we're deliberately stopping speech this.intentionalStop = false; // Initialize if speech synthesis is available if ('speechSynthesis' in window) { this.synth = window.speechSynthesis; // Load voices when they become available if (this.synth.getVoices().length > 0) { this.voiceCache = this.synth.getVoices(); this.selectPreferredVoice(); } this.synth.onvoiceschanged = () => { this.voiceCache = this.synth.getVoices(); this.selectPreferredVoice(); console.log("Voices loaded:", this.voiceCache.length); }; // Disabled by default until user activates it this.enabled = false; // Set up periodic check to detect and fix stuck speech setInterval(() => { // If we think we're speaking but the browser doesn't, reset state if (this.speaking && !this.synth.speaking && !this.isSpeakingFromQueue) { console.log("Detected stuck speech state, resetting"); this.speaking = false; // Try to continue the queue if there are more items if (this.speakQueue.length > 0) { this.processSpeakQueue(); } } }, 1000); } else { console.warn("Text-to-speech functionality not available in this browser."); } } /** * Select the preferred voice based on language and quality */ selectPreferredVoice() { // Prefer high-quality voices - ordered by preference const preferredVoiceNames = [ "Google UK English Female", "Microsoft Hazel Desktop", "Microsoft Susan", "Daniel", "Karen" ]; // Debug: Print all available voices console.log("Available voices:", this.voiceCache.map(v => v.name + " (" + v.lang + ")").join(", ")); // Try to find one of our preferred voices for (const name of preferredVoiceNames) { const voice = this.voiceCache.find(v => v.name === name); if (voice) { this.preferredVoice = voice; console.log("Selected preferred voice:", name); return; } } // Fall back to any English voice if preferred not found const englishVoice = this.voiceCache.find(v => v.lang.startsWith('en')); if (englishVoice) { this.preferredVoice = englishVoice; console.log("Selected English voice:", englishVoice.name); return; } // Last resort: use the first available voice if (this.voiceCache.length > 0) { this.preferredVoice = this.voiceCache[0]; console.log("Selected fallback voice:", this.voiceCache[0].name); } } /** * Toggle TTS functionality on/off * @returns {boolean} New state of TTS (enabled/disabled) */ toggle() { if (!this.synth) return false; // Set user activation flag when toggle is called this.hasUserActivation = true; // Clear permission error on toggle this.permissionError = false; this.enabled = !this.enabled; console.log("TTS toggled:", this.enabled ? "ON" : "OFF"); // Stop any ongoing speech when disabling if (!this.enabled && this.speaking) { this.stop(); } // Try a test utterance to request permissions if (this.enabled) { try { // Reset any current utterance first this.synth.cancel(); this.speakQueue = []; this.isSpeakingFromQueue = false; // Create a silent utterance to trigger permission request const testUtterance = new SpeechSynthesisUtterance("Hello"); testUtterance.volume = 0.05; // Very quiet but not silent to ensure it works testUtterance.rate = 1.0; // Handle any errors that might occur testUtterance.onerror = (event) => { console.warn("Permission error for TTS:", event); if (event.error === "not-allowed") { this.permissionError = true; this.enabled = false; alert("Text-to-speech was blocked by your browser. Please allow speech in your browser settings."); } }; // Try to speak the test utterance this.synth.speak(testUtterance); } catch (e) { console.error("Failed to initialize TTS:", e); } } return this.enabled; } /** * Set the speech rate/speed * @param {number} speed - Speed multiplier (0.1 to 2.0) */ setSpeed(speed) { this.currentSpeed = Math.max(0.1, Math.min(2.0, speed)); } /** * Process text for better speech synthesis * @param {string} text - Text to process * @returns {string} - Processed text */ processTextForSpeech(text) { if (!text) return ""; // Remove markdown/formatting that would sound strange when read text = text.replace(/\*\*([^*]+)\*\*/g, '$1'); // Bold text = text.replace(/\*([^*]+)\*/g, '$1'); // Italic text = text.replace(/\[([^\]]+)\]\([^)]+\)/g, '$1'); // Links // Clean up any HTML tags text = text.replace(/<[^>]+>/g, ''); return text; } /** * Split text into sentences for better speech handling * @param {string} text - Text to split * @returns {string[]} - Array of sentences */ splitIntoSentences(text) { if (!text) return []; // Split by sentence terminators, keeping the terminator with the sentence const sentenceRegex = /[^.!?]+[.!?]+/g; const sentences = text.match(sentenceRegex) || [text]; // If we have very long sentences, break them up by commas too return sentences.reduce((result, sentence) => { if (sentence.length > 150 && sentence.includes(',')) { // Split long sentences at commas const parts = sentence.split(/,\s*/); for (let i = 0; i < parts.length - 1; i++) { result.push(parts[i] + ','); } result.push(parts[parts.length - 1]); return result; } result.push(sentence); return result; }, []); } /** * Speak a single utterance with proper configuration * @param {string} text - Text to speak * @param {function} onEndCallback - Callback to execute when finished * @private */ speakUtterance(text, onEndCallback) { if (!text || text.trim() === '') { if (onEndCallback) onEndCallback(); this.processSpeakQueue(); return; } try { const utterance = new SpeechSynthesisUtterance(text); if (this.preferredVoice) { utterance.voice = this.preferredVoice; console.log("Using voice:", this.preferredVoice.name); } utterance.rate = this.currentSpeed; utterance.pitch = 1.0; utterance.volume = 1.0; utterance.onstart = () => { this.speaking = true; console.log("TTS started speaking:", text.substring(0, 30) + "..."); }; utterance.onend = () => { console.log("TTS finished speaking utterance"); if (onEndCallback) onEndCallback(); this.processSpeakQueue(); }; utterance.onerror = (event) => { // Don't treat interrupted errors as real errors when we're deliberately stopping if (event.error === "interrupted" && this.intentionalStop) { console.log("Speech intentionally interrupted"); } else { console.error("Speech synthesis error:", event); if (event.error === "not-allowed") { this.permissionError = true; this.enabled = false; } } if (onEndCallback) onEndCallback(); this.processSpeakQueue(); }; // Actually speak this.synth.speak(utterance); // Workaround for Chrome bug where speech synthesis gets stuck if (!this.synth.speaking) { this.synth.pause(); this.synth.resume(); } } catch (e) { console.error("Error in speakUtterance:", e); if (onEndCallback) onEndCallback(); this.processSpeakQueue(); } } /** * Process the next item in the speak queue * @private */ processSpeakQueue() { if (this.speakQueue.length === 0) { this.isSpeakingFromQueue = false; this.speaking = false; return; } // Skip processing if we're already speaking (prevent overlapping sentences) if (this.synth.speaking) { setTimeout(() => this.processSpeakQueue(), 100); return; } this.isSpeakingFromQueue = true; const queueItem = this.speakQueue.shift(); console.log(`Speaking queue item (${this.speakQueue.length} remaining):`, queueItem.text.substring(0, 30) + "..."); this.speakUtterance(queueItem.text, queueItem.callback); } /** * Speak the provided text by queueing sentences * @param {string} text - Text to be spoken * @param {function} onEndCallback - Callback when all speech ends */ speak(text, onEndCallback = null) { if (!this.synth || !this.enabled || !text) { if (onEndCallback) onEndCallback(); return; } // Don't attempt to speak if there's been a permission error if (this.permissionError) { console.warn("Not attempting to speak due to permission error"); if (onEndCallback) onEndCallback(); return; } // Don't attempt to speak without user activation if (!this.hasUserActivation) { console.warn("Not attempting to speak because there hasn't been user interaction yet"); if (onEndCallback) onEndCallback(); return; } // Process text for better speech const processedText = this.processTextForSpeech(text); console.log("TTS attempting to speak:", processedText.substring(0, 50) + "..."); // Stop any existing speech this.stop(); // Split into sentences for better handling const sentences = this.splitIntoSentences(processedText); // Last sentence gets the callback for (let i = 0; i < sentences.length; i++) { this.speakQueue.push({ text: sentences[i], callback: i === sentences.length - 1 ? onEndCallback : null }); } // Start processing the queue if not already processing if (!this.isSpeakingFromQueue) { this.processSpeakQueue(); } } /** * Pause the current speech */ pause() { if (!this.synth || !this.speaking) return; this.synth.pause(); this.paused = true; } /** * Resume paused speech */ resume() { if (!this.synth || !this.paused) return; this.synth.resume(); this.paused = false; } /** * Stop the current speech */ stop() { if (!this.synth) return; // Set flag to indicate this is an intentional stop before canceling this.intentionalStop = true; // Cancel any current speech synthesis this.synth.cancel(); // Reset state this.speaking = false; this.paused = false; this.utterance = null; this.speakQueue = []; this.isSpeakingFromQueue = false; // Reset the intentional stop flag after a short delay setTimeout(() => { this.intentionalStop = false; }, 100); } /** * Check if TTS is currently active/enabled */ isEnabled() { return this.enabled && !this.permissionError; } /** * Check if speech is currently in progress */ isSpeaking() { return this.speaking; } } // Create and export a singleton instance export const browserTtsHandler = new TTSHandler();