Fix TTS handler to handle longer texts by breaking them into sentence queues
This commit is contained in:
+215
-51
@@ -13,6 +13,10 @@ class TTSHandler {
|
|||||||
this.preferredVoice = null;
|
this.preferredVoice = null;
|
||||||
this.audioCache = new Map(); // Cache for audio segments
|
this.audioCache = new Map(); // Cache for audio segments
|
||||||
this.currentSpeed = 1.0;
|
this.currentSpeed = 1.0;
|
||||||
|
this.hasUserActivation = false;
|
||||||
|
this.permissionError = false;
|
||||||
|
this.speakQueue = [];
|
||||||
|
this.isSpeakingFromQueue = false;
|
||||||
|
|
||||||
// Initialize if speech synthesis is available
|
// Initialize if speech synthesis is available
|
||||||
if ('speechSynthesis' in window) {
|
if ('speechSynthesis' in window) {
|
||||||
@@ -27,10 +31,25 @@ class TTSHandler {
|
|||||||
this.synth.onvoiceschanged = () => {
|
this.synth.onvoiceschanged = () => {
|
||||||
this.voiceCache = this.synth.getVoices();
|
this.voiceCache = this.synth.getVoices();
|
||||||
this.selectPreferredVoice();
|
this.selectPreferredVoice();
|
||||||
|
console.log("Voices loaded:", this.voiceCache.length);
|
||||||
};
|
};
|
||||||
|
|
||||||
// Enable by default if available
|
// Disabled by default until user activates it
|
||||||
this.enabled = true;
|
this.enabled = false;
|
||||||
|
|
||||||
|
// Set up periodic check to detect and fix stuck speech
|
||||||
|
setInterval(() => {
|
||||||
|
// If we think we're speaking but the browser doesn't, reset state
|
||||||
|
if (this.speaking && !this.synth.speaking && !this.isSpeakingFromQueue) {
|
||||||
|
console.log("Detected stuck speech state, resetting");
|
||||||
|
this.speaking = false;
|
||||||
|
|
||||||
|
// Try to continue the queue if there are more items
|
||||||
|
if (this.speakQueue.length > 0) {
|
||||||
|
this.processSpeakQueue();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}, 1000);
|
||||||
} else {
|
} else {
|
||||||
console.warn("Text-to-speech functionality not available in this browser.");
|
console.warn("Text-to-speech functionality not available in this browser.");
|
||||||
}
|
}
|
||||||
@@ -49,11 +68,15 @@ class TTSHandler {
|
|||||||
"Karen"
|
"Karen"
|
||||||
];
|
];
|
||||||
|
|
||||||
|
// Debug: Print all available voices
|
||||||
|
console.log("Available voices:", this.voiceCache.map(v => v.name + " (" + v.lang + ")").join(", "));
|
||||||
|
|
||||||
// Try to find one of our preferred voices
|
// Try to find one of our preferred voices
|
||||||
for (const name of preferredVoiceNames) {
|
for (const name of preferredVoiceNames) {
|
||||||
const voice = this.voiceCache.find(v => v.name === name);
|
const voice = this.voiceCache.find(v => v.name === name);
|
||||||
if (voice) {
|
if (voice) {
|
||||||
this.preferredVoice = voice;
|
this.preferredVoice = voice;
|
||||||
|
console.log("Selected preferred voice:", name);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -62,28 +85,68 @@ class TTSHandler {
|
|||||||
const englishVoice = this.voiceCache.find(v => v.lang.startsWith('en'));
|
const englishVoice = this.voiceCache.find(v => v.lang.startsWith('en'));
|
||||||
if (englishVoice) {
|
if (englishVoice) {
|
||||||
this.preferredVoice = englishVoice;
|
this.preferredVoice = englishVoice;
|
||||||
|
console.log("Selected English voice:", englishVoice.name);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Last resort: use the first available voice
|
// Last resort: use the first available voice
|
||||||
if (this.voiceCache.length > 0) {
|
if (this.voiceCache.length > 0) {
|
||||||
this.preferredVoice = this.voiceCache[0];
|
this.preferredVoice = this.voiceCache[0];
|
||||||
|
console.log("Selected fallback voice:", this.voiceCache[0].name);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Toggle TTS functionality on/off
|
* Toggle TTS functionality on/off
|
||||||
|
* @returns {boolean} New state of TTS (enabled/disabled)
|
||||||
*/
|
*/
|
||||||
toggle() {
|
toggle() {
|
||||||
if (!this.synth) return false;
|
if (!this.synth) return false;
|
||||||
|
|
||||||
|
// Set user activation flag when toggle is called
|
||||||
|
this.hasUserActivation = true;
|
||||||
|
|
||||||
|
// Clear permission error on toggle
|
||||||
|
this.permissionError = false;
|
||||||
|
|
||||||
this.enabled = !this.enabled;
|
this.enabled = !this.enabled;
|
||||||
|
console.log("TTS toggled:", this.enabled ? "ON" : "OFF");
|
||||||
|
|
||||||
// Stop any ongoing speech when disabling
|
// Stop any ongoing speech when disabling
|
||||||
if (!this.enabled && this.speaking) {
|
if (!this.enabled && this.speaking) {
|
||||||
this.stop();
|
this.stop();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Try a test utterance to request permissions
|
||||||
|
if (this.enabled) {
|
||||||
|
try {
|
||||||
|
// Reset any current utterance first
|
||||||
|
this.synth.cancel();
|
||||||
|
this.speakQueue = [];
|
||||||
|
this.isSpeakingFromQueue = false;
|
||||||
|
|
||||||
|
// Create a silent utterance to trigger permission request
|
||||||
|
const testUtterance = new SpeechSynthesisUtterance("Hello");
|
||||||
|
testUtterance.volume = 0.05; // Very quiet but not silent to ensure it works
|
||||||
|
testUtterance.rate = 1.0;
|
||||||
|
|
||||||
|
// Handle any errors that might occur
|
||||||
|
testUtterance.onerror = (event) => {
|
||||||
|
console.warn("Permission error for TTS:", event);
|
||||||
|
if (event.error === "not-allowed") {
|
||||||
|
this.permissionError = true;
|
||||||
|
this.enabled = false;
|
||||||
|
alert("Text-to-speech was blocked by your browser. Please allow speech in your browser settings.");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Try to speak the test utterance
|
||||||
|
this.synth.speak(testUtterance);
|
||||||
|
} catch (e) {
|
||||||
|
console.error("Failed to initialize TTS:", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return this.enabled;
|
return this.enabled;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -93,11 +156,6 @@ class TTSHandler {
|
|||||||
*/
|
*/
|
||||||
setSpeed(speed) {
|
setSpeed(speed) {
|
||||||
this.currentSpeed = Math.max(0.1, Math.min(2.0, speed));
|
this.currentSpeed = Math.max(0.1, Math.min(2.0, speed));
|
||||||
if (this.utterance && this.speaking) {
|
|
||||||
// Cannot change speed of active utterance, need to restart
|
|
||||||
this.stop();
|
|
||||||
// Would need to restart the current text, but challenging without storing current text
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -106,6 +164,8 @@ class TTSHandler {
|
|||||||
* @returns {string} - Processed text
|
* @returns {string} - Processed text
|
||||||
*/
|
*/
|
||||||
processTextForSpeech(text) {
|
processTextForSpeech(text) {
|
||||||
|
if (!text) return "";
|
||||||
|
|
||||||
// Remove markdown/formatting that would sound strange when read
|
// Remove markdown/formatting that would sound strange when read
|
||||||
text = text.replace(/\*\*([^*]+)\*\*/g, '$1'); // Bold
|
text = text.replace(/\*\*([^*]+)\*\*/g, '$1'); // Bold
|
||||||
text = text.replace(/\*([^*]+)\*/g, '$1'); // Italic
|
text = text.replace(/\*([^*]+)\*/g, '$1'); // Italic
|
||||||
@@ -118,66 +178,168 @@ class TTSHandler {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Add natural pauses after sentences using native TTS methods
|
* Split text into sentences for better speech handling
|
||||||
* @param {SpeechSynthesisUtterance} utterance - The utterance to modify
|
* @param {string} text - Text to split
|
||||||
|
* @returns {string[]} - Array of sentences
|
||||||
*/
|
*/
|
||||||
addPauses(utterance) {
|
splitIntoSentences(text) {
|
||||||
// Instead of modifying the text, we'll use the utterance's parameters
|
if (!text) return [];
|
||||||
// to create natural pauses - these settings generally improve natural speaking
|
|
||||||
utterance.pitch = 1.0;
|
|
||||||
utterance.rate = this.currentSpeed;
|
|
||||||
|
|
||||||
// Some TTS engines support these parameters
|
// Split by sentence terminators, keeping the terminator with the sentence
|
||||||
if ('volume' in utterance) utterance.volume = 1.0;
|
const sentenceRegex = /[^.!?]+[.!?]+/g;
|
||||||
|
const sentences = text.match(sentenceRegex) || [text];
|
||||||
|
|
||||||
|
// If we have very long sentences, break them up by commas too
|
||||||
|
return sentences.reduce((result, sentence) => {
|
||||||
|
if (sentence.length > 150 && sentence.includes(',')) {
|
||||||
|
// Split long sentences at commas
|
||||||
|
const parts = sentence.split(/,\s*/);
|
||||||
|
for (let i = 0; i < parts.length - 1; i++) {
|
||||||
|
result.push(parts[i] + ',');
|
||||||
|
}
|
||||||
|
result.push(parts[parts.length - 1]);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
result.push(sentence);
|
||||||
|
return result;
|
||||||
|
}, []);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Speak the provided text
|
* Speak a single utterance with proper configuration
|
||||||
|
* @param {string} text - Text to speak
|
||||||
|
* @param {function} onEndCallback - Callback to execute when finished
|
||||||
|
* @private
|
||||||
|
*/
|
||||||
|
speakUtterance(text, onEndCallback) {
|
||||||
|
if (!text || text.trim() === '') {
|
||||||
|
if (onEndCallback) onEndCallback();
|
||||||
|
this.processSpeakQueue();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const utterance = new SpeechSynthesisUtterance(text);
|
||||||
|
|
||||||
|
if (this.preferredVoice) {
|
||||||
|
utterance.voice = this.preferredVoice;
|
||||||
|
console.log("Using voice:", this.preferredVoice.name);
|
||||||
|
}
|
||||||
|
|
||||||
|
utterance.rate = this.currentSpeed;
|
||||||
|
utterance.pitch = 1.0;
|
||||||
|
utterance.volume = 1.0;
|
||||||
|
|
||||||
|
utterance.onstart = () => {
|
||||||
|
this.speaking = true;
|
||||||
|
console.log("TTS started speaking:", text.substring(0, 30) + "...");
|
||||||
|
};
|
||||||
|
|
||||||
|
utterance.onend = () => {
|
||||||
|
console.log("TTS finished speaking utterance");
|
||||||
|
if (onEndCallback) onEndCallback();
|
||||||
|
this.processSpeakQueue();
|
||||||
|
};
|
||||||
|
|
||||||
|
utterance.onerror = (event) => {
|
||||||
|
console.error("Speech synthesis error:", event);
|
||||||
|
if (event.error === "not-allowed") {
|
||||||
|
this.permissionError = true;
|
||||||
|
this.enabled = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (onEndCallback) onEndCallback();
|
||||||
|
this.processSpeakQueue();
|
||||||
|
};
|
||||||
|
|
||||||
|
// Actually speak
|
||||||
|
this.synth.speak(utterance);
|
||||||
|
|
||||||
|
// Workaround for Chrome bug where speech synthesis gets stuck
|
||||||
|
if (!this.synth.speaking) {
|
||||||
|
this.synth.pause();
|
||||||
|
this.synth.resume();
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (e) {
|
||||||
|
console.error("Error in speakUtterance:", e);
|
||||||
|
if (onEndCallback) onEndCallback();
|
||||||
|
this.processSpeakQueue();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Process the next item in the speak queue
|
||||||
|
* @private
|
||||||
|
*/
|
||||||
|
processSpeakQueue() {
|
||||||
|
if (this.speakQueue.length === 0) {
|
||||||
|
this.isSpeakingFromQueue = false;
|
||||||
|
this.speaking = false;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip processing if we're already speaking (prevent overlapping sentences)
|
||||||
|
if (this.synth.speaking) {
|
||||||
|
setTimeout(() => this.processSpeakQueue(), 100);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
this.isSpeakingFromQueue = true;
|
||||||
|
const queueItem = this.speakQueue.shift();
|
||||||
|
|
||||||
|
console.log(`Speaking queue item (${this.speakQueue.length} remaining):`, queueItem.text.substring(0, 30) + "...");
|
||||||
|
|
||||||
|
this.speakUtterance(queueItem.text, queueItem.callback);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Speak the provided text by queueing sentences
|
||||||
* @param {string} text - Text to be spoken
|
* @param {string} text - Text to be spoken
|
||||||
* @param {function} onEndCallback - Callback when speech ends
|
* @param {function} onEndCallback - Callback when all speech ends
|
||||||
*/
|
*/
|
||||||
speak(text, onEndCallback = null) {
|
speak(text, onEndCallback = null) {
|
||||||
if (!this.synth || !this.enabled || !text) return;
|
if (!this.synth || !this.enabled || !text) {
|
||||||
|
if (onEndCallback) onEndCallback();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
// Stop any existing speech
|
// Don't attempt to speak if there's been a permission error
|
||||||
if (this.speaking) {
|
if (this.permissionError) {
|
||||||
this.stop();
|
console.warn("Not attempting to speak due to permission error");
|
||||||
|
if (onEndCallback) onEndCallback();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Don't attempt to speak without user activation
|
||||||
|
if (!this.hasUserActivation) {
|
||||||
|
console.warn("Not attempting to speak because there hasn't been user interaction yet");
|
||||||
|
if (onEndCallback) onEndCallback();
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Process text for better speech
|
// Process text for better speech
|
||||||
const processedText = this.processTextForSpeech(text);
|
const processedText = this.processTextForSpeech(text);
|
||||||
|
console.log("TTS attempting to speak:", processedText.substring(0, 50) + "...");
|
||||||
|
|
||||||
// Create and configure the utterance
|
// Stop any existing speech
|
||||||
this.utterance = new SpeechSynthesisUtterance(processedText);
|
this.stop();
|
||||||
|
|
||||||
if (this.preferredVoice) {
|
// Split into sentences for better handling
|
||||||
this.utterance.voice = this.preferredVoice;
|
const sentences = this.splitIntoSentences(processedText);
|
||||||
|
|
||||||
|
// Last sentence gets the callback
|
||||||
|
for (let i = 0; i < sentences.length; i++) {
|
||||||
|
this.speakQueue.push({
|
||||||
|
text: sentences[i],
|
||||||
|
callback: i === sentences.length - 1 ? onEndCallback : null
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
this.utterance.rate = this.currentSpeed;
|
// Start processing the queue if not already processing
|
||||||
this.utterance.pitch = 1.0;
|
if (!this.isSpeakingFromQueue) {
|
||||||
|
this.processSpeakQueue();
|
||||||
// Apply natural pausing
|
}
|
||||||
this.addPauses(this.utterance);
|
|
||||||
|
|
||||||
// Set up event handlers
|
|
||||||
this.utterance.onstart = () => {
|
|
||||||
this.speaking = true;
|
|
||||||
};
|
|
||||||
|
|
||||||
this.utterance.onend = () => {
|
|
||||||
this.speaking = false;
|
|
||||||
if (onEndCallback) onEndCallback();
|
|
||||||
};
|
|
||||||
|
|
||||||
this.utterance.onerror = (event) => {
|
|
||||||
console.error("Speech synthesis error:", event);
|
|
||||||
this.speaking = false;
|
|
||||||
if (onEndCallback) onEndCallback();
|
|
||||||
};
|
|
||||||
|
|
||||||
// Start speaking
|
|
||||||
this.synth.speak(this.utterance);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -210,13 +372,15 @@ class TTSHandler {
|
|||||||
this.speaking = false;
|
this.speaking = false;
|
||||||
this.paused = false;
|
this.paused = false;
|
||||||
this.utterance = null;
|
this.utterance = null;
|
||||||
|
this.speakQueue = [];
|
||||||
|
this.isSpeakingFromQueue = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check if TTS is currently active/enabled
|
* Check if TTS is currently active/enabled
|
||||||
*/
|
*/
|
||||||
isEnabled() {
|
isEnabled() {
|
||||||
return this.enabled;
|
return this.enabled && !this.permissionError;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
Reference in New Issue
Block a user