From 113e3b995d0135eae00e64655c4ef52413afc466 Mon Sep 17 00:00:00 2001 From: Georg Tomitsch Date: Tue, 1 Apr 2025 10:53:27 +0200 Subject: [PATCH] Fix TTS handler to handle longer texts by breaking them into sentence queues --- public/js/tts-handler.js | 266 +++++++++++++++++++++++++++++++-------- 1 file changed, 215 insertions(+), 51 deletions(-) diff --git a/public/js/tts-handler.js b/public/js/tts-handler.js index e3eedf7..70e3f9a 100644 --- a/public/js/tts-handler.js +++ b/public/js/tts-handler.js @@ -13,6 +13,10 @@ class TTSHandler { this.preferredVoice = null; this.audioCache = new Map(); // Cache for audio segments this.currentSpeed = 1.0; + this.hasUserActivation = false; + this.permissionError = false; + this.speakQueue = []; + this.isSpeakingFromQueue = false; // Initialize if speech synthesis is available if ('speechSynthesis' in window) { @@ -27,10 +31,25 @@ class TTSHandler { this.synth.onvoiceschanged = () => { this.voiceCache = this.synth.getVoices(); this.selectPreferredVoice(); + console.log("Voices loaded:", this.voiceCache.length); }; - // Enable by default if available - this.enabled = true; + // Disabled by default until user activates it + this.enabled = false; + + // Set up periodic check to detect and fix stuck speech + setInterval(() => { + // If we think we're speaking but the browser doesn't, reset state + if (this.speaking && !this.synth.speaking && !this.isSpeakingFromQueue) { + console.log("Detected stuck speech state, resetting"); + this.speaking = false; + + // Try to continue the queue if there are more items + if (this.speakQueue.length > 0) { + this.processSpeakQueue(); + } + } + }, 1000); } else { console.warn("Text-to-speech functionality not available in this browser."); } @@ -49,11 +68,15 @@ class TTSHandler { "Karen" ]; + // Debug: Print all available voices + console.log("Available voices:", this.voiceCache.map(v => v.name + " (" + v.lang + ")").join(", ")); + // Try to find one of our preferred voices for (const name of preferredVoiceNames) { const voice = this.voiceCache.find(v => v.name === name); if (voice) { this.preferredVoice = voice; + console.log("Selected preferred voice:", name); return; } } @@ -62,28 +85,68 @@ class TTSHandler { const englishVoice = this.voiceCache.find(v => v.lang.startsWith('en')); if (englishVoice) { this.preferredVoice = englishVoice; + console.log("Selected English voice:", englishVoice.name); return; } // Last resort: use the first available voice if (this.voiceCache.length > 0) { this.preferredVoice = this.voiceCache[0]; + console.log("Selected fallback voice:", this.voiceCache[0].name); } } /** * Toggle TTS functionality on/off + * @returns {boolean} New state of TTS (enabled/disabled) */ toggle() { if (!this.synth) return false; + // Set user activation flag when toggle is called + this.hasUserActivation = true; + + // Clear permission error on toggle + this.permissionError = false; + this.enabled = !this.enabled; + console.log("TTS toggled:", this.enabled ? "ON" : "OFF"); // Stop any ongoing speech when disabling if (!this.enabled && this.speaking) { this.stop(); } + // Try a test utterance to request permissions + if (this.enabled) { + try { + // Reset any current utterance first + this.synth.cancel(); + this.speakQueue = []; + this.isSpeakingFromQueue = false; + + // Create a silent utterance to trigger permission request + const testUtterance = new SpeechSynthesisUtterance("Hello"); + testUtterance.volume = 0.05; // Very quiet but not silent to ensure it works + testUtterance.rate = 1.0; + + // Handle any errors that might occur + testUtterance.onerror = (event) => { + console.warn("Permission error for TTS:", event); + if (event.error === "not-allowed") { + this.permissionError = true; + this.enabled = false; + alert("Text-to-speech was blocked by your browser. Please allow speech in your browser settings."); + } + }; + + // Try to speak the test utterance + this.synth.speak(testUtterance); + } catch (e) { + console.error("Failed to initialize TTS:", e); + } + } + return this.enabled; } @@ -93,11 +156,6 @@ class TTSHandler { */ setSpeed(speed) { this.currentSpeed = Math.max(0.1, Math.min(2.0, speed)); - if (this.utterance && this.speaking) { - // Cannot change speed of active utterance, need to restart - this.stop(); - // Would need to restart the current text, but challenging without storing current text - } } /** @@ -106,6 +164,8 @@ class TTSHandler { * @returns {string} - Processed text */ processTextForSpeech(text) { + if (!text) return ""; + // Remove markdown/formatting that would sound strange when read text = text.replace(/\*\*([^*]+)\*\*/g, '$1'); // Bold text = text.replace(/\*([^*]+)\*/g, '$1'); // Italic @@ -118,66 +178,168 @@ class TTSHandler { } /** - * Add natural pauses after sentences using native TTS methods - * @param {SpeechSynthesisUtterance} utterance - The utterance to modify + * Split text into sentences for better speech handling + * @param {string} text - Text to split + * @returns {string[]} - Array of sentences */ - addPauses(utterance) { - // Instead of modifying the text, we'll use the utterance's parameters - // to create natural pauses - these settings generally improve natural speaking - utterance.pitch = 1.0; - utterance.rate = this.currentSpeed; + splitIntoSentences(text) { + if (!text) return []; - // Some TTS engines support these parameters - if ('volume' in utterance) utterance.volume = 1.0; + // Split by sentence terminators, keeping the terminator with the sentence + const sentenceRegex = /[^.!?]+[.!?]+/g; + const sentences = text.match(sentenceRegex) || [text]; + + // If we have very long sentences, break them up by commas too + return sentences.reduce((result, sentence) => { + if (sentence.length > 150 && sentence.includes(',')) { + // Split long sentences at commas + const parts = sentence.split(/,\s*/); + for (let i = 0; i < parts.length - 1; i++) { + result.push(parts[i] + ','); + } + result.push(parts[parts.length - 1]); + return result; + } + result.push(sentence); + return result; + }, []); } /** - * Speak the provided text + * Speak a single utterance with proper configuration + * @param {string} text - Text to speak + * @param {function} onEndCallback - Callback to execute when finished + * @private + */ + speakUtterance(text, onEndCallback) { + if (!text || text.trim() === '') { + if (onEndCallback) onEndCallback(); + this.processSpeakQueue(); + return; + } + + try { + const utterance = new SpeechSynthesisUtterance(text); + + if (this.preferredVoice) { + utterance.voice = this.preferredVoice; + console.log("Using voice:", this.preferredVoice.name); + } + + utterance.rate = this.currentSpeed; + utterance.pitch = 1.0; + utterance.volume = 1.0; + + utterance.onstart = () => { + this.speaking = true; + console.log("TTS started speaking:", text.substring(0, 30) + "..."); + }; + + utterance.onend = () => { + console.log("TTS finished speaking utterance"); + if (onEndCallback) onEndCallback(); + this.processSpeakQueue(); + }; + + utterance.onerror = (event) => { + console.error("Speech synthesis error:", event); + if (event.error === "not-allowed") { + this.permissionError = true; + this.enabled = false; + } + + if (onEndCallback) onEndCallback(); + this.processSpeakQueue(); + }; + + // Actually speak + this.synth.speak(utterance); + + // Workaround for Chrome bug where speech synthesis gets stuck + if (!this.synth.speaking) { + this.synth.pause(); + this.synth.resume(); + } + + } catch (e) { + console.error("Error in speakUtterance:", e); + if (onEndCallback) onEndCallback(); + this.processSpeakQueue(); + } + } + + /** + * Process the next item in the speak queue + * @private + */ + processSpeakQueue() { + if (this.speakQueue.length === 0) { + this.isSpeakingFromQueue = false; + this.speaking = false; + return; + } + + // Skip processing if we're already speaking (prevent overlapping sentences) + if (this.synth.speaking) { + setTimeout(() => this.processSpeakQueue(), 100); + return; + } + + this.isSpeakingFromQueue = true; + const queueItem = this.speakQueue.shift(); + + console.log(`Speaking queue item (${this.speakQueue.length} remaining):`, queueItem.text.substring(0, 30) + "..."); + + this.speakUtterance(queueItem.text, queueItem.callback); + } + + /** + * Speak the provided text by queueing sentences * @param {string} text - Text to be spoken - * @param {function} onEndCallback - Callback when speech ends + * @param {function} onEndCallback - Callback when all speech ends */ speak(text, onEndCallback = null) { - if (!this.synth || !this.enabled || !text) return; + if (!this.synth || !this.enabled || !text) { + if (onEndCallback) onEndCallback(); + return; + } - // Stop any existing speech - if (this.speaking) { - this.stop(); + // Don't attempt to speak if there's been a permission error + if (this.permissionError) { + console.warn("Not attempting to speak due to permission error"); + if (onEndCallback) onEndCallback(); + return; + } + + // Don't attempt to speak without user activation + if (!this.hasUserActivation) { + console.warn("Not attempting to speak because there hasn't been user interaction yet"); + if (onEndCallback) onEndCallback(); + return; } // Process text for better speech const processedText = this.processTextForSpeech(text); + console.log("TTS attempting to speak:", processedText.substring(0, 50) + "..."); - // Create and configure the utterance - this.utterance = new SpeechSynthesisUtterance(processedText); + // Stop any existing speech + this.stop(); - if (this.preferredVoice) { - this.utterance.voice = this.preferredVoice; + // Split into sentences for better handling + const sentences = this.splitIntoSentences(processedText); + + // Last sentence gets the callback + for (let i = 0; i < sentences.length; i++) { + this.speakQueue.push({ + text: sentences[i], + callback: i === sentences.length - 1 ? onEndCallback : null + }); } - this.utterance.rate = this.currentSpeed; - this.utterance.pitch = 1.0; - - // Apply natural pausing - this.addPauses(this.utterance); - - // Set up event handlers - this.utterance.onstart = () => { - this.speaking = true; - }; - - this.utterance.onend = () => { - this.speaking = false; - if (onEndCallback) onEndCallback(); - }; - - this.utterance.onerror = (event) => { - console.error("Speech synthesis error:", event); - this.speaking = false; - if (onEndCallback) onEndCallback(); - }; - - // Start speaking - this.synth.speak(this.utterance); + // Start processing the queue if not already processing + if (!this.isSpeakingFromQueue) { + this.processSpeakQueue(); + } } /** @@ -210,13 +372,15 @@ class TTSHandler { this.speaking = false; this.paused = false; this.utterance = null; + this.speakQueue = []; + this.isSpeakingFromQueue = false; } /** * Check if TTS is currently active/enabled */ isEnabled() { - return this.enabled; + return this.enabled && !this.permissionError; } /**