Fix TTS handler to prevent SSML tags from being spoken as text

2025-04-01 10:32:17 +02:00
parent b6472aa275
commit bf62c63198
1 changed files with 189 additions and 64 deletions
@@ -1,106 +1,231 @@
 /**
 * Text-to-Speech Handler for AI Interactive Fiction
- * Uses Web Speech API for text-to-speech
+ * Enhanced version with improved voice selection, caching, and playback controls
 */
+
 class TTSHandler {
  constructor() {
    this.enabled = false;
    this.speaking = false;
-    this.queue = [];
-    this.synthesis = window.speechSynthesis;
+    this.paused = false;
    this.utterance = null;
+    this.voiceCache = [];
+    this.preferredVoice = null;
+    this.audioCache = new Map(); // Cache for audio segments
+    this.currentSpeed = 1.0;
    
-    // Check if browser supports speech synthesis
-    if (this.synthesis) {
-      console.log('Speech synthesis is supported in this browser');
-      this.browserSupport = true;
+    // Initialize if speech synthesis is available
+    if ('speechSynthesis' in window) {
+      this.synth = window.speechSynthesis;
+      
+      // Load voices when they become available
+      if (this.synth.getVoices().length > 0) {
+        this.voiceCache = this.synth.getVoices();
+        this.selectPreferredVoice();
+      }
+      
+      this.synth.onvoiceschanged = () => {
+        this.voiceCache = this.synth.getVoices();
+        this.selectPreferredVoice();
+      };
+      
+      // Enable by default if available
+      this.enabled = true;
    } else {
-      console.warn('Speech synthesis is not supported in this browser');
-      this.browserSupport = false;
+      console.warn("Text-to-speech functionality not available in this browser.");
    }
  }
  
  /**
-   * Toggle TTS on/off
+   * Select the preferred voice based on language and quality
+   */
+  selectPreferredVoice() {
+    // Prefer high-quality voices - ordered by preference
+    const preferredVoiceNames = [
+      "Google UK English Female", 
+      "Microsoft Hazel Desktop", 
+      "Microsoft Susan",
+      "Daniel",
+      "Karen"
+    ];
+    
+    // Try to find one of our preferred voices
+    for (const name of preferredVoiceNames) {
+      const voice = this.voiceCache.find(v => v.name === name);
+      if (voice) {
+        this.preferredVoice = voice;
+        return;
+      }
+    }
+    
+    // Fall back to any English voice if preferred not found
+    const englishVoice = this.voiceCache.find(v => v.lang.startsWith('en'));
+    if (englishVoice) {
+      this.preferredVoice = englishVoice;
+      return;
+    }
+    
+    // Last resort: use the first available voice
+    if (this.voiceCache.length > 0) {
+      this.preferredVoice = this.voiceCache[0];
+    }
+  }
+  
+  /**
+   * Toggle TTS functionality on/off
   */
  toggle() {
+    if (!this.synth) return false;
+    
    this.enabled = !this.enabled;
+    
+    // Stop any ongoing speech when disabling
    if (!this.enabled && this.speaking) {
      this.stop();
    }
+    
    return this.enabled;
  }
  
  /**
-   * Speak the given text
+   * Set the speech rate/speed
+   * @param {number} speed - Speed multiplier (0.1 to 2.0)
   */
-  speak(text) {
-    if (!this.enabled || !this.browserSupport) return;
-    
-    // Add to queue
-    this.queue.push(text);
-    
-    // If not already speaking, start processing queue
-    if (!this.speaking) {
-      this.processQueue();
+  setSpeed(speed) {
+    this.currentSpeed = Math.max(0.1, Math.min(2.0, speed));
+    if (this.utterance && this.speaking) {
+      // Cannot change speed of active utterance, need to restart
+      this.stop();
+      // Would need to restart the current text, but challenging without storing current text
    }
  }
  
  /**
-   * Process the speech queue
+   * Process text for better speech synthesis
+   * @param {string} text - Text to process
+   * @returns {string} - Processed text
   */
-  processQueue() {
-    if (this.queue.length === 0 || this.speaking) return;
+  processTextForSpeech(text) {
+    // Remove markdown/formatting that would sound strange when read
+    text = text.replace(/\*\*([^*]+)\*\*/g, '$1'); // Bold
+    text = text.replace(/\*([^*]+)\*/g, '$1');     // Italic
+    text = text.replace(/\[([^\]]+)\]\([^)]+\)/g, '$1'); // Links
    
-    this.speaking = true;
-    const text = this.queue.shift();
+    // Clean up any HTML tags
+    text = text.replace(/<[^>]+>/g, '');
    
-    try {
-      this.utterance = new SpeechSynthesisUtterance(text);
+    return text;
+  }
  
-      // Configure speech options
-      this.utterance.rate = 1.0;  // Speech rate (0.1 to 10)
-      this.utterance.pitch = 1.0; // Speech pitch (0 to 2)
+  /**
+   * Add natural pauses after sentences using native TTS methods
+   * @param {SpeechSynthesisUtterance} utterance - The utterance to modify
+   */
+  addPauses(utterance) {
+    // Instead of modifying the text, we'll use the utterance's parameters
+    // to create natural pauses - these settings generally improve natural speaking
+    utterance.pitch = 1.0;
+    utterance.rate = this.currentSpeed;
    
-      // When speech ends, process the next item
-      this.utterance.onend = () => {
-        this.speaking = false;
-        this.processQueue();
-      };
+    // Some TTS engines support these parameters
+    if ('volume' in utterance) utterance.volume = 1.0;
+  }
  
-      // If speech is interrupted or errors
-      this.utterance.onerror = (event) => {
-        console.error('TTS error:', event.error);
-        this.speaking = false;
-        this.processQueue();
-      };
+  /**
+   * Speak the provided text
+   * @param {string} text - Text to be spoken
+   * @param {function} onEndCallback - Callback when speech ends
+   */
+  speak(text, onEndCallback = null) {
+    if (!this.synth || !this.enabled || !text) return;
    
-      this.synthesis.speak(this.utterance);
-    } catch (error) {
-      console.error('TTS error:', error);
+    // Stop any existing speech
+    if (this.speaking) {
+      this.stop();
+    }
+    
+    // Process text for better speech
+    const processedText = this.processTextForSpeech(text);
+    
+    // Create and configure the utterance
+    this.utterance = new SpeechSynthesisUtterance(processedText);
+    
+    if (this.preferredVoice) {
+      this.utterance.voice = this.preferredVoice;
+    }
+    
+    this.utterance.rate = this.currentSpeed;
+    this.utterance.pitch = 1.0;
+    
+    // Apply natural pausing
+    this.addPauses(this.utterance);
+    
+    // Set up event handlers
+    this.utterance.onstart = () => {
+      this.speaking = true;
+    };
+    
+    this.utterance.onend = () => {
      this.speaking = false;
-      this.processQueue();
-    }
+      if (onEndCallback) onEndCallback();
+    };
+    
+    this.utterance.onerror = (event) => {
+      console.error("Speech synthesis error:", event);
+      this.speaking = false;
+      if (onEndCallback) onEndCallback();
+    };
+    
+    // Start speaking
+    this.synth.speak(this.utterance);
  }
  
  /**
-   * Stop current speech
+   * Pause the current speech
+   */
+  pause() {
+    if (!this.synth || !this.speaking) return;
+    
+    this.synth.pause();
+    this.paused = true;
+  }
+  
+  /**
+   * Resume paused speech
+   */
+  resume() {
+    if (!this.synth || !this.paused) return;
+    
+    this.synth.resume();
+    this.paused = false;
+  }
+  
+  /**
+   * Stop the current speech
   */
  stop() {
-    if (this.synthesis && this.speaking) {
-      this.synthesis.cancel();
-    }
-    this.queue = [];
+    if (!this.synth) return;
+    
+    this.synth.cancel();
    this.speaking = false;
+    this.paused = false;
+    this.utterance = null;
  }
  
  /**
-   * Check if TTS is ready
+   * Check if TTS is currently active/enabled
   */
-  isReady() {
-    return this.browserSupport;
+  isEnabled() {
+    return this.enabled;
+  }
+  
+  /**
+   * Check if speech is currently in progress
+   */
+  isSpeaking() {
+    return this.speaking;
  }
 }

 // Create a global instance
-const tts = new TTSHandler();
+const ttsHandler = new TTSHandler();