Fix TTS handler to prevent SSML tags from being spoken as text

This commit is contained in:
2025-04-01 10:32:17 +02:00
parent b6472aa275
commit bf62c63198
+176 -51
View File
@@ -1,106 +1,231 @@
/** /**
* Text-to-Speech Handler for AI Interactive Fiction * Text-to-Speech Handler for AI Interactive Fiction
* Uses Web Speech API for text-to-speech * Enhanced version with improved voice selection, caching, and playback controls
*/ */
class TTSHandler { class TTSHandler {
constructor() { constructor() {
this.enabled = false; this.enabled = false;
this.speaking = false; this.speaking = false;
this.queue = []; this.paused = false;
this.synthesis = window.speechSynthesis;
this.utterance = null; this.utterance = null;
this.voiceCache = [];
this.preferredVoice = null;
this.audioCache = new Map(); // Cache for audio segments
this.currentSpeed = 1.0;
// Check if browser supports speech synthesis // Initialize if speech synthesis is available
if (this.synthesis) { if ('speechSynthesis' in window) {
console.log('Speech synthesis is supported in this browser'); this.synth = window.speechSynthesis;
this.browserSupport = true;
// Load voices when they become available
if (this.synth.getVoices().length > 0) {
this.voiceCache = this.synth.getVoices();
this.selectPreferredVoice();
}
this.synth.onvoiceschanged = () => {
this.voiceCache = this.synth.getVoices();
this.selectPreferredVoice();
};
// Enable by default if available
this.enabled = true;
} else { } else {
console.warn('Speech synthesis is not supported in this browser'); console.warn("Text-to-speech functionality not available in this browser.");
this.browserSupport = false;
} }
} }
/** /**
* Toggle TTS on/off * Select the preferred voice based on language and quality
*/
selectPreferredVoice() {
// Prefer high-quality voices - ordered by preference
const preferredVoiceNames = [
"Google UK English Female",
"Microsoft Hazel Desktop",
"Microsoft Susan",
"Daniel",
"Karen"
];
// Try to find one of our preferred voices
for (const name of preferredVoiceNames) {
const voice = this.voiceCache.find(v => v.name === name);
if (voice) {
this.preferredVoice = voice;
return;
}
}
// Fall back to any English voice if preferred not found
const englishVoice = this.voiceCache.find(v => v.lang.startsWith('en'));
if (englishVoice) {
this.preferredVoice = englishVoice;
return;
}
// Last resort: use the first available voice
if (this.voiceCache.length > 0) {
this.preferredVoice = this.voiceCache[0];
}
}
/**
* Toggle TTS functionality on/off
*/ */
toggle() { toggle() {
if (!this.synth) return false;
this.enabled = !this.enabled; this.enabled = !this.enabled;
// Stop any ongoing speech when disabling
if (!this.enabled && this.speaking) { if (!this.enabled && this.speaking) {
this.stop(); this.stop();
} }
return this.enabled; return this.enabled;
} }
/** /**
* Speak the given text * Set the speech rate/speed
* @param {number} speed - Speed multiplier (0.1 to 2.0)
*/ */
speak(text) { setSpeed(speed) {
if (!this.enabled || !this.browserSupport) return; this.currentSpeed = Math.max(0.1, Math.min(2.0, speed));
if (this.utterance && this.speaking) {
// Add to queue // Cannot change speed of active utterance, need to restart
this.queue.push(text); this.stop();
// Would need to restart the current text, but challenging without storing current text
// If not already speaking, start processing queue
if (!this.speaking) {
this.processQueue();
} }
} }
/** /**
* Process the speech queue * Process text for better speech synthesis
* @param {string} text - Text to process
* @returns {string} - Processed text
*/ */
processQueue() { processTextForSpeech(text) {
if (this.queue.length === 0 || this.speaking) return; // Remove markdown/formatting that would sound strange when read
text = text.replace(/\*\*([^*]+)\*\*/g, '$1'); // Bold
text = text.replace(/\*([^*]+)\*/g, '$1'); // Italic
text = text.replace(/\[([^\]]+)\]\([^)]+\)/g, '$1'); // Links
// Clean up any HTML tags
text = text.replace(/<[^>]+>/g, '');
return text;
}
/**
* Add natural pauses after sentences using native TTS methods
* @param {SpeechSynthesisUtterance} utterance - The utterance to modify
*/
addPauses(utterance) {
// Instead of modifying the text, we'll use the utterance's parameters
// to create natural pauses - these settings generally improve natural speaking
utterance.pitch = 1.0;
utterance.rate = this.currentSpeed;
// Some TTS engines support these parameters
if ('volume' in utterance) utterance.volume = 1.0;
}
/**
* Speak the provided text
* @param {string} text - Text to be spoken
* @param {function} onEndCallback - Callback when speech ends
*/
speak(text, onEndCallback = null) {
if (!this.synth || !this.enabled || !text) return;
// Stop any existing speech
if (this.speaking) {
this.stop();
}
// Process text for better speech
const processedText = this.processTextForSpeech(text);
// Create and configure the utterance
this.utterance = new SpeechSynthesisUtterance(processedText);
if (this.preferredVoice) {
this.utterance.voice = this.preferredVoice;
}
this.utterance.rate = this.currentSpeed;
this.utterance.pitch = 1.0;
// Apply natural pausing
this.addPauses(this.utterance);
// Set up event handlers
this.utterance.onstart = () => {
this.speaking = true; this.speaking = true;
const text = this.queue.shift(); };
try {
this.utterance = new SpeechSynthesisUtterance(text);
// Configure speech options
this.utterance.rate = 1.0; // Speech rate (0.1 to 10)
this.utterance.pitch = 1.0; // Speech pitch (0 to 2)
// When speech ends, process the next item
this.utterance.onend = () => { this.utterance.onend = () => {
this.speaking = false; this.speaking = false;
this.processQueue(); if (onEndCallback) onEndCallback();
}; };
// If speech is interrupted or errors
this.utterance.onerror = (event) => { this.utterance.onerror = (event) => {
console.error('TTS error:', event.error); console.error("Speech synthesis error:", event);
this.speaking = false; this.speaking = false;
this.processQueue(); if (onEndCallback) onEndCallback();
}; };
this.synthesis.speak(this.utterance); // Start speaking
} catch (error) { this.synth.speak(this.utterance);
console.error('TTS error:', error);
this.speaking = false;
this.processQueue();
}
} }
/** /**
* Stop current speech * Pause the current speech
*/
pause() {
if (!this.synth || !this.speaking) return;
this.synth.pause();
this.paused = true;
}
/**
* Resume paused speech
*/
resume() {
if (!this.synth || !this.paused) return;
this.synth.resume();
this.paused = false;
}
/**
* Stop the current speech
*/ */
stop() { stop() {
if (this.synthesis && this.speaking) { if (!this.synth) return;
this.synthesis.cancel();
} this.synth.cancel();
this.queue = [];
this.speaking = false; this.speaking = false;
this.paused = false;
this.utterance = null;
} }
/** /**
* Check if TTS is ready * Check if TTS is currently active/enabled
*/ */
isReady() { isEnabled() {
return this.browserSupport; return this.enabled;
}
/**
* Check if speech is currently in progress
*/
isSpeaking() {
return this.speaking;
} }
} }
// Create a global instance // Create a global instance
const tts = new TTSHandler(); const ttsHandler = new TTSHandler();