Fix TTS handler to prevent SSML tags from being spoken as text
This commit is contained in:
+189
-64
@@ -1,106 +1,231 @@
|
|||||||
/**
|
/**
|
||||||
* Text-to-Speech Handler for AI Interactive Fiction
|
* Text-to-Speech Handler for AI Interactive Fiction
|
||||||
* Uses Web Speech API for text-to-speech
|
* Enhanced version with improved voice selection, caching, and playback controls
|
||||||
*/
|
*/
|
||||||
|
|
||||||
class TTSHandler {
|
class TTSHandler {
|
||||||
constructor() {
|
constructor() {
|
||||||
this.enabled = false;
|
this.enabled = false;
|
||||||
this.speaking = false;
|
this.speaking = false;
|
||||||
this.queue = [];
|
this.paused = false;
|
||||||
this.synthesis = window.speechSynthesis;
|
|
||||||
this.utterance = null;
|
this.utterance = null;
|
||||||
|
this.voiceCache = [];
|
||||||
|
this.preferredVoice = null;
|
||||||
|
this.audioCache = new Map(); // Cache for audio segments
|
||||||
|
this.currentSpeed = 1.0;
|
||||||
|
|
||||||
// Check if browser supports speech synthesis
|
// Initialize if speech synthesis is available
|
||||||
if (this.synthesis) {
|
if ('speechSynthesis' in window) {
|
||||||
console.log('Speech synthesis is supported in this browser');
|
this.synth = window.speechSynthesis;
|
||||||
this.browserSupport = true;
|
|
||||||
|
// Load voices when they become available
|
||||||
|
if (this.synth.getVoices().length > 0) {
|
||||||
|
this.voiceCache = this.synth.getVoices();
|
||||||
|
this.selectPreferredVoice();
|
||||||
|
}
|
||||||
|
|
||||||
|
this.synth.onvoiceschanged = () => {
|
||||||
|
this.voiceCache = this.synth.getVoices();
|
||||||
|
this.selectPreferredVoice();
|
||||||
|
};
|
||||||
|
|
||||||
|
// Enable by default if available
|
||||||
|
this.enabled = true;
|
||||||
} else {
|
} else {
|
||||||
console.warn('Speech synthesis is not supported in this browser');
|
console.warn("Text-to-speech functionality not available in this browser.");
|
||||||
this.browserSupport = false;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Toggle TTS on/off
|
* Select the preferred voice based on language and quality
|
||||||
|
*/
|
||||||
|
selectPreferredVoice() {
|
||||||
|
// Prefer high-quality voices - ordered by preference
|
||||||
|
const preferredVoiceNames = [
|
||||||
|
"Google UK English Female",
|
||||||
|
"Microsoft Hazel Desktop",
|
||||||
|
"Microsoft Susan",
|
||||||
|
"Daniel",
|
||||||
|
"Karen"
|
||||||
|
];
|
||||||
|
|
||||||
|
// Try to find one of our preferred voices
|
||||||
|
for (const name of preferredVoiceNames) {
|
||||||
|
const voice = this.voiceCache.find(v => v.name === name);
|
||||||
|
if (voice) {
|
||||||
|
this.preferredVoice = voice;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fall back to any English voice if preferred not found
|
||||||
|
const englishVoice = this.voiceCache.find(v => v.lang.startsWith('en'));
|
||||||
|
if (englishVoice) {
|
||||||
|
this.preferredVoice = englishVoice;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Last resort: use the first available voice
|
||||||
|
if (this.voiceCache.length > 0) {
|
||||||
|
this.preferredVoice = this.voiceCache[0];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Toggle TTS functionality on/off
|
||||||
*/
|
*/
|
||||||
toggle() {
|
toggle() {
|
||||||
|
if (!this.synth) return false;
|
||||||
|
|
||||||
this.enabled = !this.enabled;
|
this.enabled = !this.enabled;
|
||||||
|
|
||||||
|
// Stop any ongoing speech when disabling
|
||||||
if (!this.enabled && this.speaking) {
|
if (!this.enabled && this.speaking) {
|
||||||
this.stop();
|
this.stop();
|
||||||
}
|
}
|
||||||
|
|
||||||
return this.enabled;
|
return this.enabled;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Speak the given text
|
* Set the speech rate/speed
|
||||||
|
* @param {number} speed - Speed multiplier (0.1 to 2.0)
|
||||||
*/
|
*/
|
||||||
speak(text) {
|
setSpeed(speed) {
|
||||||
if (!this.enabled || !this.browserSupport) return;
|
this.currentSpeed = Math.max(0.1, Math.min(2.0, speed));
|
||||||
|
if (this.utterance && this.speaking) {
|
||||||
// Add to queue
|
// Cannot change speed of active utterance, need to restart
|
||||||
this.queue.push(text);
|
this.stop();
|
||||||
|
// Would need to restart the current text, but challenging without storing current text
|
||||||
// If not already speaking, start processing queue
|
|
||||||
if (!this.speaking) {
|
|
||||||
this.processQueue();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Process the speech queue
|
* Process text for better speech synthesis
|
||||||
|
* @param {string} text - Text to process
|
||||||
|
* @returns {string} - Processed text
|
||||||
*/
|
*/
|
||||||
processQueue() {
|
processTextForSpeech(text) {
|
||||||
if (this.queue.length === 0 || this.speaking) return;
|
// Remove markdown/formatting that would sound strange when read
|
||||||
|
text = text.replace(/\*\*([^*]+)\*\*/g, '$1'); // Bold
|
||||||
|
text = text.replace(/\*([^*]+)\*/g, '$1'); // Italic
|
||||||
|
text = text.replace(/\[([^\]]+)\]\([^)]+\)/g, '$1'); // Links
|
||||||
|
|
||||||
this.speaking = true;
|
// Clean up any HTML tags
|
||||||
const text = this.queue.shift();
|
text = text.replace(/<[^>]+>/g, '');
|
||||||
|
|
||||||
try {
|
return text;
|
||||||
this.utterance = new SpeechSynthesisUtterance(text);
|
}
|
||||||
|
|
||||||
// Configure speech options
|
/**
|
||||||
this.utterance.rate = 1.0; // Speech rate (0.1 to 10)
|
* Add natural pauses after sentences using native TTS methods
|
||||||
this.utterance.pitch = 1.0; // Speech pitch (0 to 2)
|
* @param {SpeechSynthesisUtterance} utterance - The utterance to modify
|
||||||
|
*/
|
||||||
// When speech ends, process the next item
|
addPauses(utterance) {
|
||||||
this.utterance.onend = () => {
|
// Instead of modifying the text, we'll use the utterance's parameters
|
||||||
this.speaking = false;
|
// to create natural pauses - these settings generally improve natural speaking
|
||||||
this.processQueue();
|
utterance.pitch = 1.0;
|
||||||
};
|
utterance.rate = this.currentSpeed;
|
||||||
|
|
||||||
// If speech is interrupted or errors
|
// Some TTS engines support these parameters
|
||||||
this.utterance.onerror = (event) => {
|
if ('volume' in utterance) utterance.volume = 1.0;
|
||||||
console.error('TTS error:', event.error);
|
}
|
||||||
this.speaking = false;
|
|
||||||
this.processQueue();
|
/**
|
||||||
};
|
* Speak the provided text
|
||||||
|
* @param {string} text - Text to be spoken
|
||||||
this.synthesis.speak(this.utterance);
|
* @param {function} onEndCallback - Callback when speech ends
|
||||||
} catch (error) {
|
*/
|
||||||
console.error('TTS error:', error);
|
speak(text, onEndCallback = null) {
|
||||||
|
if (!this.synth || !this.enabled || !text) return;
|
||||||
|
|
||||||
|
// Stop any existing speech
|
||||||
|
if (this.speaking) {
|
||||||
|
this.stop();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process text for better speech
|
||||||
|
const processedText = this.processTextForSpeech(text);
|
||||||
|
|
||||||
|
// Create and configure the utterance
|
||||||
|
this.utterance = new SpeechSynthesisUtterance(processedText);
|
||||||
|
|
||||||
|
if (this.preferredVoice) {
|
||||||
|
this.utterance.voice = this.preferredVoice;
|
||||||
|
}
|
||||||
|
|
||||||
|
this.utterance.rate = this.currentSpeed;
|
||||||
|
this.utterance.pitch = 1.0;
|
||||||
|
|
||||||
|
// Apply natural pausing
|
||||||
|
this.addPauses(this.utterance);
|
||||||
|
|
||||||
|
// Set up event handlers
|
||||||
|
this.utterance.onstart = () => {
|
||||||
|
this.speaking = true;
|
||||||
|
};
|
||||||
|
|
||||||
|
this.utterance.onend = () => {
|
||||||
this.speaking = false;
|
this.speaking = false;
|
||||||
this.processQueue();
|
if (onEndCallback) onEndCallback();
|
||||||
}
|
};
|
||||||
|
|
||||||
|
this.utterance.onerror = (event) => {
|
||||||
|
console.error("Speech synthesis error:", event);
|
||||||
|
this.speaking = false;
|
||||||
|
if (onEndCallback) onEndCallback();
|
||||||
|
};
|
||||||
|
|
||||||
|
// Start speaking
|
||||||
|
this.synth.speak(this.utterance);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Stop current speech
|
* Pause the current speech
|
||||||
|
*/
|
||||||
|
pause() {
|
||||||
|
if (!this.synth || !this.speaking) return;
|
||||||
|
|
||||||
|
this.synth.pause();
|
||||||
|
this.paused = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Resume paused speech
|
||||||
|
*/
|
||||||
|
resume() {
|
||||||
|
if (!this.synth || !this.paused) return;
|
||||||
|
|
||||||
|
this.synth.resume();
|
||||||
|
this.paused = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Stop the current speech
|
||||||
*/
|
*/
|
||||||
stop() {
|
stop() {
|
||||||
if (this.synthesis && this.speaking) {
|
if (!this.synth) return;
|
||||||
this.synthesis.cancel();
|
|
||||||
}
|
this.synth.cancel();
|
||||||
this.queue = [];
|
|
||||||
this.speaking = false;
|
this.speaking = false;
|
||||||
|
this.paused = false;
|
||||||
|
this.utterance = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check if TTS is ready
|
* Check if TTS is currently active/enabled
|
||||||
*/
|
*/
|
||||||
isReady() {
|
isEnabled() {
|
||||||
return this.browserSupport;
|
return this.enabled;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if speech is currently in progress
|
||||||
|
*/
|
||||||
|
isSpeaking() {
|
||||||
|
return this.speaking;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create a global instance
|
// Create a global instance
|
||||||
const tts = new TTSHandler();
|
const ttsHandler = new TTSHandler();
|
||||||
Reference in New Issue
Block a user