Fix TTS handler to handle longer texts by breaking them into sentence queues

2025-04-01 10:53:27 +02:00
parent bf62c63198
commit 113e3b995d
1 changed files with 215 additions and 51 deletions
@@ -13,6 +13,10 @@ class TTSHandler {
    this.preferredVoice = null;
    this.audioCache = new Map(); // Cache for audio segments
    this.currentSpeed = 1.0;
+    this.hasUserActivation = false;
+    this.permissionError = false;
+    this.speakQueue = [];
+    this.isSpeakingFromQueue = false;
    
    // Initialize if speech synthesis is available
    if ('speechSynthesis' in window) {
@@ -27,10 +31,25 @@ class TTSHandler {
      this.synth.onvoiceschanged = () => {
        this.voiceCache = this.synth.getVoices();
        this.selectPreferredVoice();
+        console.log("Voices loaded:", this.voiceCache.length);
      };
      
-      // Enable by default if available
-      this.enabled = true;
+      // Disabled by default until user activates it
+      this.enabled = false;
+      
+      // Set up periodic check to detect and fix stuck speech
+      setInterval(() => {
+        // If we think we're speaking but the browser doesn't, reset state
+        if (this.speaking && !this.synth.speaking && !this.isSpeakingFromQueue) {
+          console.log("Detected stuck speech state, resetting");
+          this.speaking = false;
+          
+          // Try to continue the queue if there are more items
+          if (this.speakQueue.length > 0) {
+            this.processSpeakQueue();
+          }
+        }
+      }, 1000);
    } else {
      console.warn("Text-to-speech functionality not available in this browser.");
    }
@@ -49,11 +68,15 @@ class TTSHandler {
      "Karen"
    ];
    
+    // Debug: Print all available voices
+    console.log("Available voices:", this.voiceCache.map(v => v.name + " (" + v.lang + ")").join(", "));
+    
    // Try to find one of our preferred voices
    for (const name of preferredVoiceNames) {
      const voice = this.voiceCache.find(v => v.name === name);
      if (voice) {
        this.preferredVoice = voice;
+        console.log("Selected preferred voice:", name);
        return;
      }
    }
@@ -62,28 +85,68 @@ class TTSHandler {
    const englishVoice = this.voiceCache.find(v => v.lang.startsWith('en'));
    if (englishVoice) {
      this.preferredVoice = englishVoice;
+      console.log("Selected English voice:", englishVoice.name);
      return;
    }
    
    // Last resort: use the first available voice
    if (this.voiceCache.length > 0) {
      this.preferredVoice = this.voiceCache[0];
+      console.log("Selected fallback voice:", this.voiceCache[0].name);
    }
  }
  
  /**
   * Toggle TTS functionality on/off
+   * @returns {boolean} New state of TTS (enabled/disabled)
   */
  toggle() {
    if (!this.synth) return false;
    
+    // Set user activation flag when toggle is called
+    this.hasUserActivation = true;
+    
+    // Clear permission error on toggle
+    this.permissionError = false;
+    
    this.enabled = !this.enabled;
+    console.log("TTS toggled:", this.enabled ? "ON" : "OFF");
    
    // Stop any ongoing speech when disabling
    if (!this.enabled && this.speaking) {
      this.stop();
    }
    
+    // Try a test utterance to request permissions
+    if (this.enabled) {
+      try {
+        // Reset any current utterance first
+        this.synth.cancel();
+        this.speakQueue = [];
+        this.isSpeakingFromQueue = false;
+        
+        // Create a silent utterance to trigger permission request
+        const testUtterance = new SpeechSynthesisUtterance("Hello");
+        testUtterance.volume = 0.05; // Very quiet but not silent to ensure it works
+        testUtterance.rate = 1.0;
+        
+        // Handle any errors that might occur
+        testUtterance.onerror = (event) => {
+          console.warn("Permission error for TTS:", event);
+          if (event.error === "not-allowed") {
+            this.permissionError = true;
+            this.enabled = false;
+            alert("Text-to-speech was blocked by your browser. Please allow speech in your browser settings.");
+          }
+        };
+        
+        // Try to speak the test utterance
+        this.synth.speak(testUtterance);
+      } catch (e) {
+        console.error("Failed to initialize TTS:", e);
+      }
+    }
+    
    return this.enabled;
  }
  
@@ -93,11 +156,6 @@ class TTSHandler {
   */
  setSpeed(speed) {
    this.currentSpeed = Math.max(0.1, Math.min(2.0, speed));
-    if (this.utterance && this.speaking) {
-      // Cannot change speed of active utterance, need to restart
-      this.stop();
-      // Would need to restart the current text, but challenging without storing current text
-    }
  }
  
  /**
@@ -106,6 +164,8 @@ class TTSHandler {
   * @returns {string} - Processed text
   */
  processTextForSpeech(text) {
+    if (!text) return "";
+    
    // Remove markdown/formatting that would sound strange when read
    text = text.replace(/\*\*([^*]+)\*\*/g, '$1'); // Bold
    text = text.replace(/\*([^*]+)\*/g, '$1');     // Italic
@@ -118,66 +178,168 @@ class TTSHandler {
  }
  
  /**
-   * Add natural pauses after sentences using native TTS methods
-   * @param {SpeechSynthesisUtterance} utterance - The utterance to modify
+   * Split text into sentences for better speech handling
+   * @param {string} text - Text to split
+   * @returns {string[]} - Array of sentences
   */
-  addPauses(utterance) {
-    // Instead of modifying the text, we'll use the utterance's parameters
-    // to create natural pauses - these settings generally improve natural speaking
-    utterance.pitch = 1.0;
-    utterance.rate = this.currentSpeed;
+  splitIntoSentences(text) {
+    if (!text) return [];
    
-    // Some TTS engines support these parameters
-    if ('volume' in utterance) utterance.volume = 1.0;
+    // Split by sentence terminators, keeping the terminator with the sentence
+    const sentenceRegex = /[^.!?]+[.!?]+/g;
+    const sentences = text.match(sentenceRegex) || [text];
+    
+    // If we have very long sentences, break them up by commas too
+    return sentences.reduce((result, sentence) => {
+      if (sentence.length > 150 && sentence.includes(',')) {
+        // Split long sentences at commas
+        const parts = sentence.split(/,\s*/);
+        for (let i = 0; i < parts.length - 1; i++) {
+          result.push(parts[i] + ',');
+        }
+        result.push(parts[parts.length - 1]);
+        return result;
+      }
+      result.push(sentence);
+      return result;
+    }, []);
  }
  
  /**
-   * Speak the provided text
+   * Speak a single utterance with proper configuration
+   * @param {string} text - Text to speak
+   * @param {function} onEndCallback - Callback to execute when finished
+   * @private
+   */
+  speakUtterance(text, onEndCallback) {
+    if (!text || text.trim() === '') {
+      if (onEndCallback) onEndCallback();
+      this.processSpeakQueue();
+      return;
+    }
+    
+    try {
+      const utterance = new SpeechSynthesisUtterance(text);
+      
+      if (this.preferredVoice) {
+        utterance.voice = this.preferredVoice;
+        console.log("Using voice:", this.preferredVoice.name);
+      }
+      
+      utterance.rate = this.currentSpeed;
+      utterance.pitch = 1.0;
+      utterance.volume = 1.0;
+      
+      utterance.onstart = () => {
+        this.speaking = true;
+        console.log("TTS started speaking:", text.substring(0, 30) + "...");
+      };
+      
+      utterance.onend = () => {
+        console.log("TTS finished speaking utterance");
+        if (onEndCallback) onEndCallback();
+        this.processSpeakQueue();
+      };
+      
+      utterance.onerror = (event) => {
+        console.error("Speech synthesis error:", event);
+        if (event.error === "not-allowed") {
+          this.permissionError = true;
+          this.enabled = false;
+        }
+        
+        if (onEndCallback) onEndCallback();
+        this.processSpeakQueue();
+      };
+      
+      // Actually speak
+      this.synth.speak(utterance);
+      
+      // Workaround for Chrome bug where speech synthesis gets stuck
+      if (!this.synth.speaking) {
+        this.synth.pause();
+        this.synth.resume();
+      }
+      
+    } catch (e) {
+      console.error("Error in speakUtterance:", e);
+      if (onEndCallback) onEndCallback();
+      this.processSpeakQueue();
+    }
+  }
+  
+  /**
+   * Process the next item in the speak queue
+   * @private
+   */
+  processSpeakQueue() {
+    if (this.speakQueue.length === 0) {
+      this.isSpeakingFromQueue = false;
+      this.speaking = false;
+      return;
+    }
+    
+    // Skip processing if we're already speaking (prevent overlapping sentences)
+    if (this.synth.speaking) {
+      setTimeout(() => this.processSpeakQueue(), 100);
+      return;
+    }
+    
+    this.isSpeakingFromQueue = true;
+    const queueItem = this.speakQueue.shift();
+    
+    console.log(`Speaking queue item (${this.speakQueue.length} remaining):`, queueItem.text.substring(0, 30) + "...");
+    
+    this.speakUtterance(queueItem.text, queueItem.callback);
+  }
+  
+  /**
+   * Speak the provided text by queueing sentences
   * @param {string} text - Text to be spoken
-   * @param {function} onEndCallback - Callback when speech ends
+   * @param {function} onEndCallback - Callback when all speech ends
   */
  speak(text, onEndCallback = null) {
-    if (!this.synth || !this.enabled || !text) return;
+    if (!this.synth || !this.enabled || !text) {
+      if (onEndCallback) onEndCallback();
+      return;
+    }
    
-    // Stop any existing speech
-    if (this.speaking) {
-      this.stop();
+    // Don't attempt to speak if there's been a permission error
+    if (this.permissionError) {
+      console.warn("Not attempting to speak due to permission error");
+      if (onEndCallback) onEndCallback();
+      return;
+    }
+    
+    // Don't attempt to speak without user activation
+    if (!this.hasUserActivation) {
+      console.warn("Not attempting to speak because there hasn't been user interaction yet");
+      if (onEndCallback) onEndCallback();
+      return;
    }
    
    // Process text for better speech
    const processedText = this.processTextForSpeech(text);
+    console.log("TTS attempting to speak:", processedText.substring(0, 50) + "...");
    
-    // Create and configure the utterance
-    this.utterance = new SpeechSynthesisUtterance(processedText);
+    // Stop any existing speech
+    this.stop();
    
-    if (this.preferredVoice) {
-      this.utterance.voice = this.preferredVoice;
+    // Split into sentences for better handling
+    const sentences = this.splitIntoSentences(processedText);
+    
+    // Last sentence gets the callback
+    for (let i = 0; i < sentences.length; i++) {
+      this.speakQueue.push({
+        text: sentences[i],
+        callback: i === sentences.length - 1 ? onEndCallback : null
+      });
    }
    
-    this.utterance.rate = this.currentSpeed;
-    this.utterance.pitch = 1.0;
-    
-    // Apply natural pausing
-    this.addPauses(this.utterance);
-    
-    // Set up event handlers
-    this.utterance.onstart = () => {
-      this.speaking = true;
-    };
-    
-    this.utterance.onend = () => {
-      this.speaking = false;
-      if (onEndCallback) onEndCallback();
-    };
-    
-    this.utterance.onerror = (event) => {
-      console.error("Speech synthesis error:", event);
-      this.speaking = false;
-      if (onEndCallback) onEndCallback();
-    };
-    
-    // Start speaking
-    this.synth.speak(this.utterance);
+    // Start processing the queue if not already processing
+    if (!this.isSpeakingFromQueue) {
+      this.processSpeakQueue();
+    }
  }
  
  /**
@@ -210,13 +372,15 @@ class TTSHandler {
    this.speaking = false;
    this.paused = false;
    this.utterance = null;
+    this.speakQueue = [];
+    this.isSpeakingFromQueue = false;
  }
  
  /**
   * Check if TTS is currently active/enabled
   */
  isEnabled() {
-    return this.enabled;
+    return this.enabled && !this.permissionError;
  }
  
  /**