ai.interactive.fiction/public/js/tts-handler.js

/**
 * Text-to-Speech Handler for AI Interactive Fiction
 * Enhanced version with improved voice selection, caching, and playback controls
 */

class TTSHandler {
  constructor() {
    this.enabled = false;
    this.speaking = false;
    this.paused = false;
    this.utterance = null;
    this.voiceCache = [];
    this.preferredVoice = null;
    this.audioCache = new Map(); // Cache for audio segments
    this.currentSpeed = 1.0;
    this.hasUserActivation = false;
    this.permissionError = false;
    this.speakQueue = [];
    this.isSpeakingFromQueue = false;

    // Initialize if speech synthesis is available
    if ('speechSynthesis' in window) {
      this.synth = window.speechSynthesis;

      // Load voices when they become available
      if (this.synth.getVoices().length > 0) {
        this.voiceCache = this.synth.getVoices();
        this.selectPreferredVoice();
      }

      this.synth.onvoiceschanged = () => {
        this.voiceCache = this.synth.getVoices();
        this.selectPreferredVoice();
        console.log("Voices loaded:", this.voiceCache.length);
      };

      // Disabled by default until user activates it
      this.enabled = false;

      // Set up periodic check to detect and fix stuck speech
      setInterval(() => {
        // If we think we're speaking but the browser doesn't, reset state
        if (this.speaking && !this.synth.speaking && !this.isSpeakingFromQueue) {
          console.log("Detected stuck speech state, resetting");
          this.speaking = false;

          // Try to continue the queue if there are more items
          if (this.speakQueue.length > 0) {
            this.processSpeakQueue();
          }
        }
      }, 1000);
    } else {
      console.warn("Text-to-speech functionality not available in this browser.");
    }
  }

  /**
   * Select the preferred voice based on language and quality
   */
  selectPreferredVoice() {
    // Prefer high-quality voices - ordered by preference
    const preferredVoiceNames = [
      "Google UK English Female",
      "Microsoft Hazel Desktop",
      "Microsoft Susan",
      "Daniel",
      "Karen"
    ];

    // Debug: Print all available voices
    console.log("Available voices:", this.voiceCache.map(v => v.name + " (" + v.lang + ")").join(", "));

    // Try to find one of our preferred voices
    for (const name of preferredVoiceNames) {
      const voice = this.voiceCache.find(v => v.name === name);
      if (voice) {
        this.preferredVoice = voice;
        console.log("Selected preferred voice:", name);
        return;
      }
    }

    // Fall back to any English voice if preferred not found
    const englishVoice = this.voiceCache.find(v => v.lang.startsWith('en'));
    if (englishVoice) {
      this.preferredVoice = englishVoice;
      console.log("Selected English voice:", englishVoice.name);
      return;
    }

    // Last resort: use the first available voice
    if (this.voiceCache.length > 0) {
      this.preferredVoice = this.voiceCache[0];
      console.log("Selected fallback voice:", this.voiceCache[0].name);
    }
  }

  /**
   * Toggle TTS functionality on/off
   * @returns {boolean} New state of TTS (enabled/disabled)
   */
  toggle() {
    if (!this.synth) return false;

    // Set user activation flag when toggle is called
    this.hasUserActivation = true;

    // Clear permission error on toggle
    this.permissionError = false;

    this.enabled = !this.enabled;
    console.log("TTS toggled:", this.enabled ? "ON" : "OFF");

    // Stop any ongoing speech when disabling
    if (!this.enabled && this.speaking) {
      this.stop();
    }

    // Try a test utterance to request permissions
    if (this.enabled) {
      try {
        // Reset any current utterance first
        this.synth.cancel();
        this.speakQueue = [];
        this.isSpeakingFromQueue = false;

        // Create a silent utterance to trigger permission request
        const testUtterance = new SpeechSynthesisUtterance("Hello");
        testUtterance.volume = 0.05; // Very quiet but not silent to ensure it works
        testUtterance.rate = 1.0;

        // Handle any errors that might occur
        testUtterance.onerror = (event) => {
          console.warn("Permission error for TTS:", event);
          if (event.error === "not-allowed") {
            this.permissionError = true;
            this.enabled = false;
            alert("Text-to-speech was blocked by your browser. Please allow speech in your browser settings.");
          }
        };

        // Try to speak the test utterance
        this.synth.speak(testUtterance);
      } catch (e) {
        console.error("Failed to initialize TTS:", e);
      }
    }

    return this.enabled;
  }

  /**
   * Set the speech rate/speed
   * @param {number} speed - Speed multiplier (0.1 to 2.0)
   */
  setSpeed(speed) {
    this.currentSpeed = Math.max(0.1, Math.min(2.0, speed));
  }

  /**
   * Process text for better speech synthesis
   * @param {string} text - Text to process
   * @returns {string} - Processed text
   */
  processTextForSpeech(text) {
    if (!text) return "";

    // Remove markdown/formatting that would sound strange when read
    text = text.replace(/\*\*([^*]+)\*\*/g, '$1'); // Bold
    text = text.replace(/\*([^*]+)\*/g, '$1');     // Italic
    text = text.replace(/\[([^\]]+)\]\([^)]+\)/g, '$1'); // Links

    // Clean up any HTML tags
    text = text.replace(/<[^>]+>/g, '');

    return text;
  }

  /**
   * Split text into sentences for better speech handling
   * @param {string} text - Text to split
   * @returns {string[]} - Array of sentences
   */
  splitIntoSentences(text) {
    if (!text) return [];

    // Split by sentence terminators, keeping the terminator with the sentence
    const sentenceRegex = /[^.!?]+[.!?]+/g;
    const sentences = text.match(sentenceRegex) || [text];

    // If we have very long sentences, break them up by commas too
    return sentences.reduce((result, sentence) => {
      if (sentence.length > 150 && sentence.includes(',')) {
        // Split long sentences at commas
        const parts = sentence.split(/,\s*/);
        for (let i = 0; i < parts.length - 1; i++) {
          result.push(parts[i] + ',');
        }
        result.push(parts[parts.length - 1]);
        return result;
      }
      result.push(sentence);
      return result;
    }, []);
  }

  /**
   * Speak a single utterance with proper configuration
   * @param {string} text - Text to speak
   * @param {function} onEndCallback - Callback to execute when finished
   * @private
   */
  speakUtterance(text, onEndCallback) {
    if (!text || text.trim() === '') {
      if (onEndCallback) onEndCallback();
      this.processSpeakQueue();
      return;
    }

    try {
      const utterance = new SpeechSynthesisUtterance(text);

      if (this.preferredVoice) {
        utterance.voice = this.preferredVoice;
        console.log("Using voice:", this.preferredVoice.name);
      }

      utterance.rate = this.currentSpeed;
      utterance.pitch = 1.0;
      utterance.volume = 1.0;

      utterance.onstart = () => {
        this.speaking = true;
        console.log("TTS started speaking:", text.substring(0, 30) + "...");
      };

      utterance.onend = () => {
        console.log("TTS finished speaking utterance");
        if (onEndCallback) onEndCallback();
        this.processSpeakQueue();
      };

      utterance.onerror = (event) => {
        console.error("Speech synthesis error:", event);
        if (event.error === "not-allowed") {
          this.permissionError = true;
          this.enabled = false;
        }

        if (onEndCallback) onEndCallback();
        this.processSpeakQueue();
      };

      // Actually speak
      this.synth.speak(utterance);

      // Workaround for Chrome bug where speech synthesis gets stuck
      if (!this.synth.speaking) {
        this.synth.pause();
        this.synth.resume();
      }

    } catch (e) {
      console.error("Error in speakUtterance:", e);
      if (onEndCallback) onEndCallback();
      this.processSpeakQueue();
    }
  }

  /**
   * Process the next item in the speak queue
   * @private
   */
  processSpeakQueue() {
    if (this.speakQueue.length === 0) {
      this.isSpeakingFromQueue = false;
      this.speaking = false;
      return;
    }

    // Skip processing if we're already speaking (prevent overlapping sentences)
    if (this.synth.speaking) {
      setTimeout(() => this.processSpeakQueue(), 100);
      return;
    }

    this.isSpeakingFromQueue = true;
    const queueItem = this.speakQueue.shift();

    console.log(`Speaking queue item (${this.speakQueue.length} remaining):`, queueItem.text.substring(0, 30) + "...");

    this.speakUtterance(queueItem.text, queueItem.callback);
  }

  /**
   * Speak the provided text by queueing sentences
   * @param {string} text - Text to be spoken
   * @param {function} onEndCallback - Callback when all speech ends
   */
  speak(text, onEndCallback = null) {
    if (!this.synth || !this.enabled || !text) {
      if (onEndCallback) onEndCallback();
      return;
    }

    // Don't attempt to speak if there's been a permission error
    if (this.permissionError) {
      console.warn("Not attempting to speak due to permission error");
      if (onEndCallback) onEndCallback();
      return;
    }

    // Don't attempt to speak without user activation
    if (!this.hasUserActivation) {
      console.warn("Not attempting to speak because there hasn't been user interaction yet");
      if (onEndCallback) onEndCallback();
      return;
    }

    // Process text for better speech
    const processedText = this.processTextForSpeech(text);
    console.log("TTS attempting to speak:", processedText.substring(0, 50) + "...");

    // Stop any existing speech
    this.stop();

    // Split into sentences for better handling
    const sentences = this.splitIntoSentences(processedText);

    // Last sentence gets the callback
    for (let i = 0; i < sentences.length; i++) {
      this.speakQueue.push({
        text: sentences[i],
        callback: i === sentences.length - 1 ? onEndCallback : null
      });
    }

    // Start processing the queue if not already processing
    if (!this.isSpeakingFromQueue) {
      this.processSpeakQueue();
    }
  }

  /**
   * Pause the current speech
   */
  pause() {
    if (!this.synth || !this.speaking) return;

    this.synth.pause();
    this.paused = true;
  }

  /**
   * Resume paused speech
   */
  resume() {
    if (!this.synth || !this.paused) return;

    this.synth.resume();
    this.paused = false;
  }

  /**
   * Stop the current speech
   */
  stop() {
    if (!this.synth) return;

    this.synth.cancel();
    this.speaking = false;
    this.paused = false;
    this.utterance = null;
    this.speakQueue = [];
    this.isSpeakingFromQueue = false;
  }

  /**
   * Check if TTS is currently active/enabled
   */
  isEnabled() {
    return this.enabled && !this.permissionError;
  }

  /**
   * Check if speech is currently in progress
   */
  isSpeaking() {
    return this.speaking;
  }
}

// Create a global instance
const ttsHandler = new TTSHandler();