ai.interactive.fiction/public/js/tts-handler.js

/**
 * Text-to-Speech Handler for AI Interactive Fiction
 * Enhanced version with improved voice selection, caching, and playback controls
 */

class TTSHandler {
  constructor() {
    this.enabled = false;
    this.speaking = false;
    this.paused = false;
    this.utterance = null;
    this.voiceCache = [];
    this.preferredVoice = null;
    this.audioCache = new Map(); // Cache for audio segments
    this.currentSpeed = 1.0;

    // Initialize if speech synthesis is available
    if ('speechSynthesis' in window) {
      this.synth = window.speechSynthesis;

      // Load voices when they become available
      if (this.synth.getVoices().length > 0) {
        this.voiceCache = this.synth.getVoices();
        this.selectPreferredVoice();
      }

      this.synth.onvoiceschanged = () => {
        this.voiceCache = this.synth.getVoices();
        this.selectPreferredVoice();
      };

      // Enable by default if available
      this.enabled = true;
    } else {
      console.warn("Text-to-speech functionality not available in this browser.");
    }
  }

  /**
   * Select the preferred voice based on language and quality
   */
  selectPreferredVoice() {
    // Prefer high-quality voices - ordered by preference
    const preferredVoiceNames = [
      "Google UK English Female",
      "Microsoft Hazel Desktop",
      "Microsoft Susan",
      "Daniel",
      "Karen"
    ];

    // Try to find one of our preferred voices
    for (const name of preferredVoiceNames) {
      const voice = this.voiceCache.find(v => v.name === name);
      if (voice) {
        this.preferredVoice = voice;
        return;
      }
    }

    // Fall back to any English voice if preferred not found
    const englishVoice = this.voiceCache.find(v => v.lang.startsWith('en'));
    if (englishVoice) {
      this.preferredVoice = englishVoice;
      return;
    }

    // Last resort: use the first available voice
    if (this.voiceCache.length > 0) {
      this.preferredVoice = this.voiceCache[0];
    }
  }

  /**
   * Toggle TTS functionality on/off
   */
  toggle() {
    if (!this.synth) return false;

    this.enabled = !this.enabled;

    // Stop any ongoing speech when disabling
    if (!this.enabled && this.speaking) {
      this.stop();
    }

    return this.enabled;
  }

  /**
   * Set the speech rate/speed
   * @param {number} speed - Speed multiplier (0.1 to 2.0)
   */
  setSpeed(speed) {
    this.currentSpeed = Math.max(0.1, Math.min(2.0, speed));
    if (this.utterance && this.speaking) {
      // Cannot change speed of active utterance, need to restart
      this.stop();
      // Would need to restart the current text, but challenging without storing current text
    }
  }

  /**
   * Process text for better speech synthesis
   * @param {string} text - Text to process
   * @returns {string} - Processed text
   */
  processTextForSpeech(text) {
    // Remove markdown/formatting that would sound strange when read
    text = text.replace(/\*\*([^*]+)\*\*/g, '$1'); // Bold
    text = text.replace(/\*([^*]+)\*/g, '$1');     // Italic
    text = text.replace(/\[([^\]]+)\]\([^)]+\)/g, '$1'); // Links

    // Clean up any HTML tags
    text = text.replace(/<[^>]+>/g, '');

    return text;
  }

  /**
   * Add natural pauses after sentences using native TTS methods
   * @param {SpeechSynthesisUtterance} utterance - The utterance to modify
   */
  addPauses(utterance) {
    // Instead of modifying the text, we'll use the utterance's parameters
    // to create natural pauses - these settings generally improve natural speaking
    utterance.pitch = 1.0;
    utterance.rate = this.currentSpeed;

    // Some TTS engines support these parameters
    if ('volume' in utterance) utterance.volume = 1.0;
  }

  /**
   * Speak the provided text
   * @param {string} text - Text to be spoken
   * @param {function} onEndCallback - Callback when speech ends
   */
  speak(text, onEndCallback = null) {
    if (!this.synth || !this.enabled || !text) return;

    // Stop any existing speech
    if (this.speaking) {
      this.stop();
    }

    // Process text for better speech
    const processedText = this.processTextForSpeech(text);

    // Create and configure the utterance
    this.utterance = new SpeechSynthesisUtterance(processedText);

    if (this.preferredVoice) {
      this.utterance.voice = this.preferredVoice;
    }

    this.utterance.rate = this.currentSpeed;
    this.utterance.pitch = 1.0;

    // Apply natural pausing
    this.addPauses(this.utterance);

    // Set up event handlers
    this.utterance.onstart = () => {
      this.speaking = true;
    };

    this.utterance.onend = () => {
      this.speaking = false;
      if (onEndCallback) onEndCallback();
    };

    this.utterance.onerror = (event) => {
      console.error("Speech synthesis error:", event);
      this.speaking = false;
      if (onEndCallback) onEndCallback();
    };

    // Start speaking
    this.synth.speak(this.utterance);
  }

  /**
   * Pause the current speech
   */
  pause() {
    if (!this.synth || !this.speaking) return;

    this.synth.pause();
    this.paused = true;
  }

  /**
   * Resume paused speech
   */
  resume() {
    if (!this.synth || !this.paused) return;

    this.synth.resume();
    this.paused = false;
  }

  /**
   * Stop the current speech
   */
  stop() {
    if (!this.synth) return;

    this.synth.cancel();
    this.speaking = false;
    this.paused = false;
    this.utterance = null;
  }

  /**
   * Check if TTS is currently active/enabled
   */
  isEnabled() {
    return this.enabled;
  }

  /**
   * Check if speech is currently in progress
   */
  isSpeaking() {
    return this.speaking;
  }
}

// Create a global instance
const ttsHandler = new TTSHandler();