From bf62c63198622aa2d816595ada530a77383a3333 Mon Sep 17 00:00:00 2001 From: Georg Tomitsch Date: Tue, 1 Apr 2025 10:32:17 +0200 Subject: [PATCH] Fix TTS handler to prevent SSML tags from being spoken as text --- public/js/tts-handler.js | 253 +++++++++++++++++++++++++++++---------- 1 file changed, 189 insertions(+), 64 deletions(-) diff --git a/public/js/tts-handler.js b/public/js/tts-handler.js index 144baa4..e3eedf7 100644 --- a/public/js/tts-handler.js +++ b/public/js/tts-handler.js @@ -1,106 +1,231 @@ /** * Text-to-Speech Handler for AI Interactive Fiction - * Uses Web Speech API for text-to-speech + * Enhanced version with improved voice selection, caching, and playback controls */ + class TTSHandler { constructor() { this.enabled = false; this.speaking = false; - this.queue = []; - this.synthesis = window.speechSynthesis; + this.paused = false; this.utterance = null; + this.voiceCache = []; + this.preferredVoice = null; + this.audioCache = new Map(); // Cache for audio segments + this.currentSpeed = 1.0; - // Check if browser supports speech synthesis - if (this.synthesis) { - console.log('Speech synthesis is supported in this browser'); - this.browserSupport = true; + // Initialize if speech synthesis is available + if ('speechSynthesis' in window) { + this.synth = window.speechSynthesis; + + // Load voices when they become available + if (this.synth.getVoices().length > 0) { + this.voiceCache = this.synth.getVoices(); + this.selectPreferredVoice(); + } + + this.synth.onvoiceschanged = () => { + this.voiceCache = this.synth.getVoices(); + this.selectPreferredVoice(); + }; + + // Enable by default if available + this.enabled = true; } else { - console.warn('Speech synthesis is not supported in this browser'); - this.browserSupport = false; + console.warn("Text-to-speech functionality not available in this browser."); } } - + /** - * Toggle TTS on/off + * Select the preferred voice based on language and quality + */ + selectPreferredVoice() { + // Prefer high-quality voices - ordered by preference + const preferredVoiceNames = [ + "Google UK English Female", + "Microsoft Hazel Desktop", + "Microsoft Susan", + "Daniel", + "Karen" + ]; + + // Try to find one of our preferred voices + for (const name of preferredVoiceNames) { + const voice = this.voiceCache.find(v => v.name === name); + if (voice) { + this.preferredVoice = voice; + return; + } + } + + // Fall back to any English voice if preferred not found + const englishVoice = this.voiceCache.find(v => v.lang.startsWith('en')); + if (englishVoice) { + this.preferredVoice = englishVoice; + return; + } + + // Last resort: use the first available voice + if (this.voiceCache.length > 0) { + this.preferredVoice = this.voiceCache[0]; + } + } + + /** + * Toggle TTS functionality on/off */ toggle() { + if (!this.synth) return false; + this.enabled = !this.enabled; + + // Stop any ongoing speech when disabling if (!this.enabled && this.speaking) { this.stop(); } + return this.enabled; } - + /** - * Speak the given text + * Set the speech rate/speed + * @param {number} speed - Speed multiplier (0.1 to 2.0) */ - speak(text) { - if (!this.enabled || !this.browserSupport) return; - - // Add to queue - this.queue.push(text); - - // If not already speaking, start processing queue - if (!this.speaking) { - this.processQueue(); + setSpeed(speed) { + this.currentSpeed = Math.max(0.1, Math.min(2.0, speed)); + if (this.utterance && this.speaking) { + // Cannot change speed of active utterance, need to restart + this.stop(); + // Would need to restart the current text, but challenging without storing current text } } - + /** - * Process the speech queue + * Process text for better speech synthesis + * @param {string} text - Text to process + * @returns {string} - Processed text */ - processQueue() { - if (this.queue.length === 0 || this.speaking) return; + processTextForSpeech(text) { + // Remove markdown/formatting that would sound strange when read + text = text.replace(/\*\*([^*]+)\*\*/g, '$1'); // Bold + text = text.replace(/\*([^*]+)\*/g, '$1'); // Italic + text = text.replace(/\[([^\]]+)\]\([^)]+\)/g, '$1'); // Links - this.speaking = true; - const text = this.queue.shift(); + // Clean up any HTML tags + text = text.replace(/<[^>]+>/g, ''); - try { - this.utterance = new SpeechSynthesisUtterance(text); - - // Configure speech options - this.utterance.rate = 1.0; // Speech rate (0.1 to 10) - this.utterance.pitch = 1.0; // Speech pitch (0 to 2) - - // When speech ends, process the next item - this.utterance.onend = () => { - this.speaking = false; - this.processQueue(); - }; - - // If speech is interrupted or errors - this.utterance.onerror = (event) => { - console.error('TTS error:', event.error); - this.speaking = false; - this.processQueue(); - }; - - this.synthesis.speak(this.utterance); - } catch (error) { - console.error('TTS error:', error); + return text; + } + + /** + * Add natural pauses after sentences using native TTS methods + * @param {SpeechSynthesisUtterance} utterance - The utterance to modify + */ + addPauses(utterance) { + // Instead of modifying the text, we'll use the utterance's parameters + // to create natural pauses - these settings generally improve natural speaking + utterance.pitch = 1.0; + utterance.rate = this.currentSpeed; + + // Some TTS engines support these parameters + if ('volume' in utterance) utterance.volume = 1.0; + } + + /** + * Speak the provided text + * @param {string} text - Text to be spoken + * @param {function} onEndCallback - Callback when speech ends + */ + speak(text, onEndCallback = null) { + if (!this.synth || !this.enabled || !text) return; + + // Stop any existing speech + if (this.speaking) { + this.stop(); + } + + // Process text for better speech + const processedText = this.processTextForSpeech(text); + + // Create and configure the utterance + this.utterance = new SpeechSynthesisUtterance(processedText); + + if (this.preferredVoice) { + this.utterance.voice = this.preferredVoice; + } + + this.utterance.rate = this.currentSpeed; + this.utterance.pitch = 1.0; + + // Apply natural pausing + this.addPauses(this.utterance); + + // Set up event handlers + this.utterance.onstart = () => { + this.speaking = true; + }; + + this.utterance.onend = () => { this.speaking = false; - this.processQueue(); - } + if (onEndCallback) onEndCallback(); + }; + + this.utterance.onerror = (event) => { + console.error("Speech synthesis error:", event); + this.speaking = false; + if (onEndCallback) onEndCallback(); + }; + + // Start speaking + this.synth.speak(this.utterance); } - + /** - * Stop current speech + * Pause the current speech + */ + pause() { + if (!this.synth || !this.speaking) return; + + this.synth.pause(); + this.paused = true; + } + + /** + * Resume paused speech + */ + resume() { + if (!this.synth || !this.paused) return; + + this.synth.resume(); + this.paused = false; + } + + /** + * Stop the current speech */ stop() { - if (this.synthesis && this.speaking) { - this.synthesis.cancel(); - } - this.queue = []; + if (!this.synth) return; + + this.synth.cancel(); this.speaking = false; + this.paused = false; + this.utterance = null; } - + /** - * Check if TTS is ready + * Check if TTS is currently active/enabled */ - isReady() { - return this.browserSupport; + isEnabled() { + return this.enabled; + } + + /** + * Check if speech is currently in progress + */ + isSpeaking() { + return this.speaking; } } // Create a global instance -const tts = new TTSHandler(); \ No newline at end of file +const ttsHandler = new TTSHandler(); \ No newline at end of file