From 113e3b995d0135eae00e64655c4ef52413afc466 Mon Sep 17 00:00:00 2001
From: Georg Tomitsch <georg@tomitsch.net>
Date: Tue, 1 Apr 2025 10:53:27 +0200
Subject: [PATCH] Fix TTS handler to handle longer texts by breaking them into
 sentence queues

---
 public/js/tts-handler.js | 266 +++++++++++++++++++++++++++++++--------
 1 file changed, 215 insertions(+), 51 deletions(-)

diff --git a/public/js/tts-handler.js b/public/js/tts-handler.js
index e3eedf7..70e3f9a 100644
--- a/public/js/tts-handler.js
+++ b/public/js/tts-handler.js
@@ -13,6 +13,10 @@ class TTSHandler {
     this.preferredVoice = null;
     this.audioCache = new Map(); // Cache for audio segments
     this.currentSpeed = 1.0;
+    this.hasUserActivation = false;
+    this.permissionError = false;
+    this.speakQueue = [];
+    this.isSpeakingFromQueue = false;
     
     // Initialize if speech synthesis is available
     if ('speechSynthesis' in window) {
@@ -27,10 +31,25 @@ class TTSHandler {
       this.synth.onvoiceschanged = () => {
         this.voiceCache = this.synth.getVoices();
         this.selectPreferredVoice();
+        console.log("Voices loaded:", this.voiceCache.length);
       };
       
-      // Enable by default if available
-      this.enabled = true;
+      // Disabled by default until user activates it
+      this.enabled = false;
+      
+      // Set up periodic check to detect and fix stuck speech
+      setInterval(() => {
+        // If we think we're speaking but the browser doesn't, reset state
+        if (this.speaking && !this.synth.speaking && !this.isSpeakingFromQueue) {
+          console.log("Detected stuck speech state, resetting");
+          this.speaking = false;
+          
+          // Try to continue the queue if there are more items
+          if (this.speakQueue.length > 0) {
+            this.processSpeakQueue();
+          }
+        }
+      }, 1000);
     } else {
       console.warn("Text-to-speech functionality not available in this browser.");
     }
@@ -49,11 +68,15 @@ class TTSHandler {
       "Karen"
     ];
     
+    // Debug: Print all available voices
+    console.log("Available voices:", this.voiceCache.map(v => v.name + " (" + v.lang + ")").join(", "));
+    
     // Try to find one of our preferred voices
     for (const name of preferredVoiceNames) {
       const voice = this.voiceCache.find(v => v.name === name);
       if (voice) {
         this.preferredVoice = voice;
+        console.log("Selected preferred voice:", name);
         return;
       }
     }
@@ -62,28 +85,68 @@ class TTSHandler {
     const englishVoice = this.voiceCache.find(v => v.lang.startsWith('en'));
     if (englishVoice) {
       this.preferredVoice = englishVoice;
+      console.log("Selected English voice:", englishVoice.name);
       return;
     }
     
     // Last resort: use the first available voice
     if (this.voiceCache.length > 0) {
       this.preferredVoice = this.voiceCache[0];
+      console.log("Selected fallback voice:", this.voiceCache[0].name);
     }
   }
   
   /**
    * Toggle TTS functionality on/off
+   * @returns {boolean} New state of TTS (enabled/disabled)
    */
   toggle() {
     if (!this.synth) return false;
     
+    // Set user activation flag when toggle is called
+    this.hasUserActivation = true;
+    
+    // Clear permission error on toggle
+    this.permissionError = false;
+    
     this.enabled = !this.enabled;
+    console.log("TTS toggled:", this.enabled ? "ON" : "OFF");
     
     // Stop any ongoing speech when disabling
     if (!this.enabled && this.speaking) {
       this.stop();
     }
     
+    // Try a test utterance to request permissions
+    if (this.enabled) {
+      try {
+        // Reset any current utterance first
+        this.synth.cancel();
+        this.speakQueue = [];
+        this.isSpeakingFromQueue = false;
+        
+        // Create a silent utterance to trigger permission request
+        const testUtterance = new SpeechSynthesisUtterance("Hello");
+        testUtterance.volume = 0.05; // Very quiet but not silent to ensure it works
+        testUtterance.rate = 1.0;
+        
+        // Handle any errors that might occur
+        testUtterance.onerror = (event) => {
+          console.warn("Permission error for TTS:", event);
+          if (event.error === "not-allowed") {
+            this.permissionError = true;
+            this.enabled = false;
+            alert("Text-to-speech was blocked by your browser. Please allow speech in your browser settings.");
+          }
+        };
+        
+        // Try to speak the test utterance
+        this.synth.speak(testUtterance);
+      } catch (e) {
+        console.error("Failed to initialize TTS:", e);
+      }
+    }
+    
     return this.enabled;
   }
   
@@ -93,11 +156,6 @@ class TTSHandler {
    */
   setSpeed(speed) {
     this.currentSpeed = Math.max(0.1, Math.min(2.0, speed));
-    if (this.utterance && this.speaking) {
-      // Cannot change speed of active utterance, need to restart
-      this.stop();
-      // Would need to restart the current text, but challenging without storing current text
-    }
   }
   
   /**
@@ -106,6 +164,8 @@ class TTSHandler {
    * @returns {string} - Processed text
    */
   processTextForSpeech(text) {
+    if (!text) return "";
+    
     // Remove markdown/formatting that would sound strange when read
     text = text.replace(/\*\*([^*]+)\*\*/g, '$1'); // Bold
     text = text.replace(/\*([^*]+)\*/g, '$1');     // Italic
@@ -118,66 +178,168 @@ class TTSHandler {
   }
   
   /**
-   * Add natural pauses after sentences using native TTS methods
-   * @param {SpeechSynthesisUtterance} utterance - The utterance to modify
+   * Split text into sentences for better speech handling
+   * @param {string} text - Text to split
+   * @returns {string[]} - Array of sentences
    */
-  addPauses(utterance) {
-    // Instead of modifying the text, we'll use the utterance's parameters
-    // to create natural pauses - these settings generally improve natural speaking
-    utterance.pitch = 1.0;
-    utterance.rate = this.currentSpeed;
+  splitIntoSentences(text) {
+    if (!text) return [];
     
-    // Some TTS engines support these parameters
-    if ('volume' in utterance) utterance.volume = 1.0;
+    // Split by sentence terminators, keeping the terminator with the sentence
+    const sentenceRegex = /[^.!?]+[.!?]+/g;
+    const sentences = text.match(sentenceRegex) || [text];
+    
+    // If we have very long sentences, break them up by commas too
+    return sentences.reduce((result, sentence) => {
+      if (sentence.length > 150 && sentence.includes(',')) {
+        // Split long sentences at commas
+        const parts = sentence.split(/,\s*/);
+        for (let i = 0; i < parts.length - 1; i++) {
+          result.push(parts[i] + ',');
+        }
+        result.push(parts[parts.length - 1]);
+        return result;
+      }
+      result.push(sentence);
+      return result;
+    }, []);
   }
   
   /**
-   * Speak the provided text
+   * Speak a single utterance with proper configuration
+   * @param {string} text - Text to speak
+   * @param {function} onEndCallback - Callback to execute when finished
+   * @private
+   */
+  speakUtterance(text, onEndCallback) {
+    if (!text || text.trim() === '') {
+      if (onEndCallback) onEndCallback();
+      this.processSpeakQueue();
+      return;
+    }
+    
+    try {
+      const utterance = new SpeechSynthesisUtterance(text);
+      
+      if (this.preferredVoice) {
+        utterance.voice = this.preferredVoice;
+        console.log("Using voice:", this.preferredVoice.name);
+      }
+      
+      utterance.rate = this.currentSpeed;
+      utterance.pitch = 1.0;
+      utterance.volume = 1.0;
+      
+      utterance.onstart = () => {
+        this.speaking = true;
+        console.log("TTS started speaking:", text.substring(0, 30) + "...");
+      };
+      
+      utterance.onend = () => {
+        console.log("TTS finished speaking utterance");
+        if (onEndCallback) onEndCallback();
+        this.processSpeakQueue();
+      };
+      
+      utterance.onerror = (event) => {
+        console.error("Speech synthesis error:", event);
+        if (event.error === "not-allowed") {
+          this.permissionError = true;
+          this.enabled = false;
+        }
+        
+        if (onEndCallback) onEndCallback();
+        this.processSpeakQueue();
+      };
+      
+      // Actually speak
+      this.synth.speak(utterance);
+      
+      // Workaround for Chrome bug where speech synthesis gets stuck
+      if (!this.synth.speaking) {
+        this.synth.pause();
+        this.synth.resume();
+      }
+      
+    } catch (e) {
+      console.error("Error in speakUtterance:", e);
+      if (onEndCallback) onEndCallback();
+      this.processSpeakQueue();
+    }
+  }
+  
+  /**
+   * Process the next item in the speak queue
+   * @private
+   */
+  processSpeakQueue() {
+    if (this.speakQueue.length === 0) {
+      this.isSpeakingFromQueue = false;
+      this.speaking = false;
+      return;
+    }
+    
+    // Skip processing if we're already speaking (prevent overlapping sentences)
+    if (this.synth.speaking) {
+      setTimeout(() => this.processSpeakQueue(), 100);
+      return;
+    }
+    
+    this.isSpeakingFromQueue = true;
+    const queueItem = this.speakQueue.shift();
+    
+    console.log(`Speaking queue item (${this.speakQueue.length} remaining):`, queueItem.text.substring(0, 30) + "...");
+    
+    this.speakUtterance(queueItem.text, queueItem.callback);
+  }
+  
+  /**
+   * Speak the provided text by queueing sentences
    * @param {string} text - Text to be spoken
-   * @param {function} onEndCallback - Callback when speech ends
+   * @param {function} onEndCallback - Callback when all speech ends
    */
   speak(text, onEndCallback = null) {
-    if (!this.synth || !this.enabled || !text) return;
+    if (!this.synth || !this.enabled || !text) {
+      if (onEndCallback) onEndCallback();
+      return;
+    }
     
-    // Stop any existing speech
-    if (this.speaking) {
-      this.stop();
+    // Don't attempt to speak if there's been a permission error
+    if (this.permissionError) {
+      console.warn("Not attempting to speak due to permission error");
+      if (onEndCallback) onEndCallback();
+      return;
+    }
+    
+    // Don't attempt to speak without user activation
+    if (!this.hasUserActivation) {
+      console.warn("Not attempting to speak because there hasn't been user interaction yet");
+      if (onEndCallback) onEndCallback();
+      return;
     }
     
     // Process text for better speech
     const processedText = this.processTextForSpeech(text);
+    console.log("TTS attempting to speak:", processedText.substring(0, 50) + "...");
     
-    // Create and configure the utterance
-    this.utterance = new SpeechSynthesisUtterance(processedText);
+    // Stop any existing speech
+    this.stop();
     
-    if (this.preferredVoice) {
-      this.utterance.voice = this.preferredVoice;
+    // Split into sentences for better handling
+    const sentences = this.splitIntoSentences(processedText);
+    
+    // Last sentence gets the callback
+    for (let i = 0; i < sentences.length; i++) {
+      this.speakQueue.push({
+        text: sentences[i],
+        callback: i === sentences.length - 1 ? onEndCallback : null
+      });
     }
     
-    this.utterance.rate = this.currentSpeed;
-    this.utterance.pitch = 1.0;
-    
-    // Apply natural pausing
-    this.addPauses(this.utterance);
-    
-    // Set up event handlers
-    this.utterance.onstart = () => {
-      this.speaking = true;
-    };
-    
-    this.utterance.onend = () => {
-      this.speaking = false;
-      if (onEndCallback) onEndCallback();
-    };
-    
-    this.utterance.onerror = (event) => {
-      console.error("Speech synthesis error:", event);
-      this.speaking = false;
-      if (onEndCallback) onEndCallback();
-    };
-    
-    // Start speaking
-    this.synth.speak(this.utterance);
+    // Start processing the queue if not already processing
+    if (!this.isSpeakingFromQueue) {
+      this.processSpeakQueue();
+    }
   }
   
   /**
@@ -210,13 +372,15 @@ class TTSHandler {
     this.speaking = false;
     this.paused = false;
     this.utterance = null;
+    this.speakQueue = [];
+    this.isSpeakingFromQueue = false;
   }
   
   /**
    * Check if TTS is currently active/enabled
    */
   isEnabled() {
-    return this.enabled;
+    return this.enabled && !this.permissionError;
   }
   
   /**