Fix TTS handler to handle longer texts by breaking them into sentence queues

2025-04-01 10:53:27 +02:00
parent bf62c63198
commit 113e3b995d
1 changed files with 215 additions and 51 deletions
@@ -13,6 +13,10 @@ class TTSHandler {
    this.preferredVoice = null;
    this.audioCache = new Map(); // Cache for audio segments
    this.currentSpeed = 1.0;
    this.hasUserActivation = false;
    this.permissionError = false;
    this.speakQueue = [];
    this.isSpeakingFromQueue = false;
    // Initialize if speech synthesis is available
    if ('speechSynthesis' in window) {
@@ -27,10 +31,25 @@ class TTSHandler {
      this.synth.onvoiceschanged = () => {
        this.voiceCache = this.synth.getVoices();
        this.selectPreferredVoice();
        console.log("Voices loaded:", this.voiceCache.length);
      };
-      // Enable by default if available
+      // Disabled by default until user activates it
-      this.enabled = true;
+      this.enabled = false;
      // Set up periodic check to detect and fix stuck speech
      setInterval(() => {
        // If we think we're speaking but the browser doesn't, reset state
        if (this.speaking && !this.synth.speaking && !this.isSpeakingFromQueue) {
          console.log("Detected stuck speech state, resetting");
          this.speaking = false;
          // Try to continue the queue if there are more items
          if (this.speakQueue.length > 0) {
            this.processSpeakQueue();
          }
        }
      }, 1000);
    } else {
      console.warn("Text-to-speech functionality not available in this browser.");
    }
@@ -49,11 +68,15 @@ class TTSHandler {
      "Karen"
    ];
    // Debug: Print all available voices
    console.log("Available voices:", this.voiceCache.map(v => v.name + " (" + v.lang + ")").join(", "));
    // Try to find one of our preferred voices
    for (const name of preferredVoiceNames) {
      const voice = this.voiceCache.find(v => v.name === name);
      if (voice) {
        this.preferredVoice = voice;
        console.log("Selected preferred voice:", name);
        return;
      }
    }
@@ -62,28 +85,68 @@ class TTSHandler {
    const englishVoice = this.voiceCache.find(v => v.lang.startsWith('en'));
    if (englishVoice) {
      this.preferredVoice = englishVoice;
      console.log("Selected English voice:", englishVoice.name);
      return;
    }
    // Last resort: use the first available voice
    if (this.voiceCache.length > 0) {
      this.preferredVoice = this.voiceCache[0];
      console.log("Selected fallback voice:", this.voiceCache[0].name);
    }
  }
  /**
   * Toggle TTS functionality on/off
   * @returns {boolean} New state of TTS (enabled/disabled)
   */
  toggle() {
    if (!this.synth) return false;
    // Set user activation flag when toggle is called
    this.hasUserActivation = true;
    // Clear permission error on toggle
    this.permissionError = false;
    this.enabled = !this.enabled;
    console.log("TTS toggled:", this.enabled ? "ON" : "OFF");
    // Stop any ongoing speech when disabling
    if (!this.enabled && this.speaking) {
      this.stop();
    }
    // Try a test utterance to request permissions
    if (this.enabled) {
      try {
        // Reset any current utterance first
        this.synth.cancel();
        this.speakQueue = [];
        this.isSpeakingFromQueue = false;
        // Create a silent utterance to trigger permission request
        const testUtterance = new SpeechSynthesisUtterance("Hello");
        testUtterance.volume = 0.05; // Very quiet but not silent to ensure it works
        testUtterance.rate = 1.0;
        // Handle any errors that might occur
        testUtterance.onerror = (event) => {
          console.warn("Permission error for TTS:", event);
          if (event.error === "not-allowed") {
            this.permissionError = true;
            this.enabled = false;
            alert("Text-to-speech was blocked by your browser. Please allow speech in your browser settings.");
          }
        };
        // Try to speak the test utterance
        this.synth.speak(testUtterance);
      } catch (e) {
        console.error("Failed to initialize TTS:", e);
      }
    }
    return this.enabled;
  }
@@ -93,11 +156,6 @@ class TTSHandler {
   */
  setSpeed(speed) {
    this.currentSpeed = Math.max(0.1, Math.min(2.0, speed));
    if (this.utterance && this.speaking) {
      // Cannot change speed of active utterance, need to restart
      this.stop();
      // Would need to restart the current text, but challenging without storing current text
    }
  }
  /**
@@ -106,6 +164,8 @@ class TTSHandler {
   * @returns {string} - Processed text
   */
  processTextForSpeech(text) {
    if (!text) return "";
    // Remove markdown/formatting that would sound strange when read
    text = text.replace(/\*\*([^*]+)\*\*/g, '$1'); // Bold
    text = text.replace(/\*([^*]+)\*/g, '$1');     // Italic
@@ -118,66 +178,168 @@ class TTSHandler {
  }
  /**
-   * Add natural pauses after sentences using native TTS methods
+   * Split text into sentences for better speech handling
-   * @param {SpeechSynthesisUtterance} utterance - The utterance to modify
+   * @param {string} text - Text to split
   * @returns {string[]} - Array of sentences
   */
-  addPauses(utterance) {
+  splitIntoSentences(text) {
-    // Instead of modifying the text, we'll use the utterance's parameters
+    if (!text) return [];
    // to create natural pauses - these settings generally improve natural speaking
    utterance.pitch = 1.0;
    utterance.rate = this.currentSpeed;
-    // Some TTS engines support these parameters
+    // Split by sentence terminators, keeping the terminator with the sentence
-    if ('volume' in utterance) utterance.volume = 1.0;
+    const sentenceRegex = /[^.!?]+[.!?]+/g;
    const sentences = text.match(sentenceRegex) || [text];
    // If we have very long sentences, break them up by commas too
    return sentences.reduce((result, sentence) => {
      if (sentence.length > 150 && sentence.includes(',')) {
        // Split long sentences at commas
        const parts = sentence.split(/,\s*/);
        for (let i = 0; i < parts.length - 1; i++) {
          result.push(parts[i] + ',');
        }
        result.push(parts[parts.length - 1]);
        return result;
      }
      result.push(sentence);
      return result;
    }, []);
  }
  /**
-   * Speak the provided text
+   * Speak a single utterance with proper configuration
   * @param {string} text - Text to speak
   * @param {function} onEndCallback - Callback to execute when finished
   * @private
   */
  speakUtterance(text, onEndCallback) {
    if (!text || text.trim() === '') {
      if (onEndCallback) onEndCallback();
      this.processSpeakQueue();
      return;
    }
    try {
      const utterance = new SpeechSynthesisUtterance(text);
      if (this.preferredVoice) {
        utterance.voice = this.preferredVoice;
        console.log("Using voice:", this.preferredVoice.name);
      }
      utterance.rate = this.currentSpeed;
      utterance.pitch = 1.0;
      utterance.volume = 1.0;
      utterance.onstart = () => {
        this.speaking = true;
        console.log("TTS started speaking:", text.substring(0, 30) + "...");
      };
      utterance.onend = () => {
        console.log("TTS finished speaking utterance");
        if (onEndCallback) onEndCallback();
        this.processSpeakQueue();
      };
      utterance.onerror = (event) => {
        console.error("Speech synthesis error:", event);
        if (event.error === "not-allowed") {
          this.permissionError = true;
          this.enabled = false;
        }
        if (onEndCallback) onEndCallback();
        this.processSpeakQueue();
      };
      // Actually speak
      this.synth.speak(utterance);
      // Workaround for Chrome bug where speech synthesis gets stuck
      if (!this.synth.speaking) {
        this.synth.pause();
        this.synth.resume();
      }
    } catch (e) {
      console.error("Error in speakUtterance:", e);
      if (onEndCallback) onEndCallback();
      this.processSpeakQueue();
    }
  }
  /**
   * Process the next item in the speak queue
   * @private
   */
  processSpeakQueue() {
    if (this.speakQueue.length === 0) {
      this.isSpeakingFromQueue = false;
      this.speaking = false;
      return;
    }
    // Skip processing if we're already speaking (prevent overlapping sentences)
    if (this.synth.speaking) {
      setTimeout(() => this.processSpeakQueue(), 100);
      return;
    }
    this.isSpeakingFromQueue = true;
    const queueItem = this.speakQueue.shift();
    console.log(`Speaking queue item (${this.speakQueue.length} remaining):`, queueItem.text.substring(0, 30) + "...");
    this.speakUtterance(queueItem.text, queueItem.callback);
  }
  /**
   * Speak the provided text by queueing sentences
   * @param {string} text - Text to be spoken
-   * @param {function} onEndCallback - Callback when speech ends
+   * @param {function} onEndCallback - Callback when all speech ends
   */
  speak(text, onEndCallback = null) {
-    if (!this.synth || !this.enabled || !text) return;
+    if (!this.synth || !this.enabled || !text) {
      if (onEndCallback) onEndCallback();
      return;
    }
-    // Stop any existing speech
+    // Don't attempt to speak if there's been a permission error
-    if (this.speaking) {
+    if (this.permissionError) {
-      this.stop();
+      console.warn("Not attempting to speak due to permission error");
      if (onEndCallback) onEndCallback();
      return;
    }
    // Don't attempt to speak without user activation
    if (!this.hasUserActivation) {
      console.warn("Not attempting to speak because there hasn't been user interaction yet");
      if (onEndCallback) onEndCallback();
      return;
    }
    // Process text for better speech
    const processedText = this.processTextForSpeech(text);
    console.log("TTS attempting to speak:", processedText.substring(0, 50) + "...");
-    // Create and configure the utterance
+    // Stop any existing speech
-    this.utterance = new SpeechSynthesisUtterance(processedText);
+    this.stop();
-    if (this.preferredVoice) {
+    // Split into sentences for better handling
-      this.utterance.voice = this.preferredVoice;
+    const sentences = this.splitIntoSentences(processedText);
    // Last sentence gets the callback
    for (let i = 0; i < sentences.length; i++) {
      this.speakQueue.push({
        text: sentences[i],
        callback: i === sentences.length - 1 ? onEndCallback : null
      });
    }
-    this.utterance.rate = this.currentSpeed;
+    // Start processing the queue if not already processing
-    this.utterance.pitch = 1.0;
+    if (!this.isSpeakingFromQueue) {
-    
+      this.processSpeakQueue();
-    // Apply natural pausing
+    }
    this.addPauses(this.utterance);
    // Set up event handlers
    this.utterance.onstart = () => {
      this.speaking = true;
    };
    this.utterance.onend = () => {
      this.speaking = false;
      if (onEndCallback) onEndCallback();
    };
    this.utterance.onerror = (event) => {
      console.error("Speech synthesis error:", event);
      this.speaking = false;
      if (onEndCallback) onEndCallback();
    };
    // Start speaking
    this.synth.speak(this.utterance);
  }
  /**
@@ -210,13 +372,15 @@ class TTSHandler {
    this.speaking = false;
    this.paused = false;
    this.utterance = null;
    this.speakQueue = [];
    this.isSpeakingFromQueue = false;
  }
  /**
   * Check if TTS is currently active/enabled
   */
  isEnabled() {
-    return this.enabled;
+    return this.enabled && !this.permissionError;
  }
  /**