Files
ai.interactive.fiction/public/js/browser-tts-handler.js

790 lines
29 KiB
JavaScript

/**
* BrowserTTSHandler for AI Interactive Fiction
* Implementation using the browser's Web Speech API
*/
import { TTSHandler } from './tts-handler.js';
import { moduleRegistry } from './module-registry.js';
export class BrowserTTSHandler extends TTSHandler {
constructor() {
super();
this.id = 'browser';
this.name = 'Browser TTS Handler';
// Voice options
this.voiceOptions = {
voice: null, // Will be set during initialization
rate: 1.0,
pitch: 1.0,
volume: 1.0
};
// State
this.available = false;
this.voices = [];
this.currentUtterance = null;
// Add dependencies
this.dependencies = ['localization', 'persistence-manager'];
// Bind methods
this.bindMethods([
'initialize',
'speak',
'speakPreloaded',
'preloadSpeech',
'stop',
'isAvailable',
'getId',
'getVoices',
'setVoiceOptions',
'onVoicesChanged',
'getModule'
]);
}
/**
* Get a module from the registry
* @param {string} moduleId - ID of the module to get
* @returns {Object|null} - The module or null if not found
*/
getModule(moduleId) {
return moduleRegistry.getModule(moduleId);
}
/**
* Initialize the browser TTS handler
* @param {Function} progressCallback - Callback for progress updates
* @returns {Promise<boolean>} - Resolves with success status
*/
async initialize(progressCallback = null) {
try {
if (progressCallback) {
progressCallback(10, 'Initializing Browser TTS');
}
this.changeState('LOADING');
// Check for browser support
if (!window.speechSynthesis) {
console.warn('Browser TTS: Speech synthesis not available in this browser');
if (progressCallback) {
progressCallback(100, 'Browser TTS not available');
}
this.changeState('ERROR');
return false;
}
if (progressCallback) {
progressCallback(30, 'Browser TTS supported');
}
// Check for required dependencies
const localization = this.getModule('localization');
const persistenceManager = this.getModule('persistence-manager');
if (!localization) {
console.error('Browser TTS: Required dependency \'localization\' not found');
this.changeState('ERROR');
return false;
}
if (!persistenceManager) {
console.error('Browser TTS: Required dependency \'persistence-manager\' not found');
this.changeState('ERROR');
return false;
}
if (progressCallback) {
progressCallback(40, 'Browser TTS dependencies loaded');
}
// Load voices - but don't fail initialization if no voices are found yet
// The browser may provide voices later
try {
await this.loadVoices();
console.log(`Browser TTS: Loaded ${this.voices.length} voices initially`);
} catch (error) {
console.warn('Browser TTS: Error loading voices initially:', error);
// Don't fail initialization - voices may become available later
this.voices = [];
}
if (progressCallback) {
progressCallback(60, `Browser TTS loaded ${this.voices.length} voices`);
}
// Set speech options from preferences
try {
const rate = persistenceManager.getPreference('tts', 'speed', 1.0);
const pitch = persistenceManager.getPreference('tts', 'pitch', 1.0);
const volume = persistenceManager.getPreference('tts', 'volume', 1.0);
this.options.rate = parseFloat(rate);
this.options.pitch = parseFloat(pitch);
this.options.volume = parseFloat(volume);
// Log all available voices for debugging
console.log('Browser TTS: Available voices:', this.voices.map(v => `${v.name} (${v.lang})`));
// Set voice based on locale
const locale = localization.getLocale();
console.log(`Browser TTS: Setting voice for locale: ${locale}`);
const preferredVoice = persistenceManager.getPreference('tts', 'browser_voice');
await this.selectVoiceForLocale(locale, preferredVoice);
if (progressCallback) {
progressCallback(80, 'Browser TTS voice selected');
}
} catch (error) {
console.warn('Browser TTS: Error setting speech options:', error);
// Don't fail initialization due to voice selection issues
}
// If voices were loaded but no voice is selected, try to set a default
if (this.voices.length > 0 && !this.voiceOptions.voice) {
console.warn('Browser TTS: No voice selected after initialization, trying fallback');
this.voiceOptions.voice = this.voices[0];
}
// Always mark as available if speech synthesis is supported, regardless of voice selection
// This ensures the Browser TTS option always appears in the dropdown
this.available = true;
this.isReady = true;
if (progressCallback) {
progressCallback(100, 'Browser TTS initialized');
}
this.changeState('FINISHED');
return true;
} catch (error) {
console.error('Browser TTS: Initialization error:', error);
if (progressCallback) {
progressCallback(100, `Browser TTS initialization failed - ${error.message}`);
}
this.changeState('ERROR');
return false;
}
}
/**
* Handle voices changed event
*/
async onVoicesChanged() {
await this.loadVoices();
const localization = this.getModule('localization');
const persistenceManager = this.getModule('persistence-manager');
let currentLocale = localization ? localization.getLocale() : 'en-us';
let preferredVoice = persistenceManager ? persistenceManager.getPreference('tts', 'voice', '') : '';
await this.selectVoiceForLocale(currentLocale, preferredVoice);
}
/**
* Load available voices
* @returns {Promise<void>}
*/
async loadVoices() {
return new Promise(resolve => {
// Helper function to filter and sort voices
const processVoices = () => {
this.voices = speechSynthesis.getVoices() || [];
// Log all available voices for debugging
console.log('Browser TTS: Raw loaded voices:',
this.voices.map(v => `${v.name} (${v.lang})`));
// Ensure we have at least one voice
if (this.voices.length === 0) {
console.warn('Browser TTS: No voices available from speech synthesis');
resolve();
return;
}
// Sort voices to prioritize English voices first
this.voices.sort((a, b) => {
// Put English voices first
const aIsEnglish = a.lang.toLowerCase().startsWith('en');
const bIsEnglish = b.lang.toLowerCase().startsWith('en');
if (aIsEnglish && !bIsEnglish) return -1;
if (!aIsEnglish && bIsEnglish) return 1;
// Then sort by language
return a.lang.localeCompare(b.lang);
});
console.log('Browser TTS: Sorted voices:',
this.voices.map(v => `${v.name} (${v.lang})`));
resolve();
};
// Some browsers need a timeout to get voices
const timeoutId = setTimeout(() => {
if (this.voices.length === 0) {
console.log('Browser TTS: Using timeout fallback to get voices');
processVoices();
}
}, 1000);
// Try to get voices immediately
this.voices = speechSynthesis.getVoices() || [];
if (this.voices.length > 0) {
clearTimeout(timeoutId);
console.log(`Browser TTS: Loaded ${this.voices.length} voices immediately`);
processVoices();
} else {
// If no voices are available yet, set up the onvoiceschanged event
speechSynthesis.onvoiceschanged = () => {
clearTimeout(timeoutId);
console.log('Browser TTS: Voices changed event fired');
processVoices();
speechSynthesis.onvoiceschanged = null;
};
}
});
}
/**
* Set voice based on locale
* @param {string} locale - Locale code (e.g., 'en-us', 'de', 'fr')
* @param {string} preferredVoice - Optional preferred voice name
* @returns {Promise<void>}
*/
async selectVoiceForLocale(locale = 'en-us', preferredVoice = '') {
// Debug voice selection process
console.log(`Browser TTS: Selecting voice for locale ${locale}, preferred voice: ${preferredVoice || 'none'}`);
console.log(`Browser TTS: Available voices:`, this.voices.map(v => `${v.name} (${v.lang})`));
// Normalize locale for comparison
const normalizedLocale = locale.toLowerCase();
const languageCode = normalizedLocale.split('-')[0]; // e.g., 'en' from 'en-us'
console.log(`Browser TTS: Normalized locale: ${normalizedLocale}, language code: ${languageCode}`);
// If we have a preferred voice, try to use it first
if (preferredVoice) {
const matchingVoice = this.voices.find(voice =>
voice.name === preferredVoice ||
voice.voiceURI === preferredVoice
);
if (matchingVoice) {
this.voiceOptions.voice = matchingVoice;
console.log(`Browser TTS: Using preferred voice: ${matchingVoice.name}`);
return;
}
}
// Find voices exactly matching the locale (e.g., 'en-us')
const exactLocaleVoices = this.voices.filter(voice => {
const voiceLocale = voice.lang.toLowerCase();
return voiceLocale === normalizedLocale;
});
console.log(`Browser TTS: Found ${exactLocaleVoices.length} exact locale matches for ${normalizedLocale}`);
if (exactLocaleVoices.length > 0) {
// Use the first matching voice
this.voiceOptions.voice = exactLocaleVoices[0];
console.log(`Browser TTS: Using exact locale match for ${normalizedLocale}: ${this.voiceOptions.voice.name}`);
return;
}
// Find voices matching the language code (e.g., 'en')
const languageVoices = this.voices.filter(voice => {
const voiceLocale = voice.lang.toLowerCase();
console.log(`Browser TTS: Comparing voice lang ${voiceLocale} with language code ${languageCode}`);
return voiceLocale.startsWith(languageCode) ||
(voiceLocale.length === 2 && languageCode.startsWith(voiceLocale));
});
console.log(`Browser TTS: Found ${languageVoices.length} language matches for ${languageCode}`);
if (languageVoices.length > 0) {
// Use the first matching voice
this.voiceOptions.voice = languageVoices[0];
console.log(`Browser TTS: Using language match for ${languageCode}: ${this.voiceOptions.voice.name}`);
return;
}
// If current language is not English and no matching voice found, try to find English voices
if (languageCode !== 'en') {
const englishVoices = this.voices.filter(voice =>
voice.lang.toLowerCase().startsWith('en')
);
console.log(`Browser TTS: Found ${englishVoices.length} English voices as fallback`);
if (englishVoices.length > 0) {
this.voiceOptions.voice = englishVoices[0];
console.log(`Browser TTS: No ${languageCode} voice found, using English voice: ${this.voiceOptions.voice.name}`);
return;
}
}
// As a last resort, use any available voice
if (this.voices.length > 0) {
this.voiceOptions.voice = this.voices[0];
console.log(`Browser TTS: No matching voice found, using first available voice: ${this.voiceOptions.voice.name}`);
} else {
console.log("Browser TTS: No voices available");
}
}
/**
* Preload speech for a text
* @param {string} text - Text to preload
* @returns {Promise<Object>} - Preloaded speech data
*/
async preloadSpeech(text) {
if (!this.available || !text || !this.voiceOptions.voice) {
return null;
}
try {
// Process text for TTS
const processedText = this.preprocessText(text);
console.log(`Browser TTS: Preloading speech for: "${processedText.substring(0, 50)}${processedText.length > 50 ? '...' : ''}"`);
// Use MediaRecorder to capture audio output to WAV
const audioData = await this.synthesizeToWav(processedText);
if (!audioData) {
console.warn("Browser TTS: Failed to generate WAV audio");
return null;
}
// Create audio element from blob
const audio = new Audio(URL.createObjectURL(audioData.blob));
// Store preloaded data in the centralized TTSFactory cache
const preloadData = {
audio: audio,
blob: audioData.blob,
text: processedText
};
// Use the TTSFactory's cache instead of a local cache
// this.preloadCache.set(text, preloadData);
// Instead, return the preloaded data to be stored in the TTSFactory's cache
return preloadData;
} catch (error) {
console.warn("Browser TTS: Error preloading speech:", error);
return null;
}
}
/**
* Convert speech synthesis to WAV format
* @param {string} text - Text to synthesize
* @returns {Promise<Object>} - Object with WAV blob
*/
synthesizeToWav(text) {
return new Promise((resolve, reject) => {
try {
// Create utterance
const utterance = new SpeechSynthesisUtterance(text);
// Set voice and options
utterance.voice = this.voiceOptions.voice;
utterance.rate = this.voiceOptions.rate;
utterance.pitch = this.voiceOptions.pitch;
utterance.volume = this.voiceOptions.volume;
utterance.lang = this.voiceOptions.voice.lang;
// Use Web Audio API to capture the speech output
const audioContext = new (window.AudioContext || window.webkitAudioContext)();
const destination = audioContext.createMediaStreamDestination();
const mediaRecorder = new MediaRecorder(destination.stream);
const audioChunks = [];
// Capture the audio chunks
mediaRecorder.ondataavailable = (event) => {
if (event.data.size > 0) {
audioChunks.push(event.data);
}
};
// When recording completes
mediaRecorder.onstop = () => {
// Create a WAV blob from the audio chunks
const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
resolve({ blob: audioBlob });
};
// Set up speech synthesis events
utterance.onstart = () => {
console.log("Browser TTS: Started synthesizing audio to WAV");
mediaRecorder.start();
};
utterance.onend = () => {
console.log("Browser TTS: Finished synthesizing audio to WAV");
mediaRecorder.stop();
};
utterance.onerror = (error) => {
console.error("Browser TTS: Error synthesizing audio:", error);
reject(error);
};
// Start the speech synthesis
speechSynthesis.speak(utterance);
// If synthesis doesn't start within a reasonable timeout, reject the promise
const timeout = setTimeout(() => {
if (mediaRecorder.state === 'inactive') {
console.warn("Browser TTS: Synthesis to WAV timed out");
reject(new Error("Synthesis timed out"));
}
}, 5000);
// Clear timeout when synthesis starts
utterance.onstart = () => {
clearTimeout(timeout);
console.log("Browser TTS: Started synthesizing audio to WAV");
mediaRecorder.start();
};
} catch (error) {
console.error("Browser TTS: Error setting up WAV synthesis:", error);
reject(error);
}
});
}
/**
* Speak text using preloaded utterance
* @param {Object} preloadData - Preloaded speech data
* @param {Function} callback - Callback for when speech completes
* @returns {boolean} - Success status
*/
speakPreloaded(preloadData, callback = null) {
if (!this.available || !preloadData || !preloadData.audio) {
if (callback) {
setTimeout(() => callback({ success: false, reason: 'no_preloaded_data' }), 0);
}
return false;
}
try {
// Stop any current speech
this.stop();
const { audio, text } = preloadData;
// Dispatch start event
this.dispatchEvent('tts:speak:start', { text });
// Set up event listeners
audio.onended = () => {
this.currentUtterance = null;
// Dispatch end event
this.dispatchEvent('tts:speak:end', { text });
if (callback) {
callback({ success: true });
}
};
audio.onerror = (error) => {
this.currentUtterance = null;
// Dispatch error event
this.dispatchEvent('tts:speak:error', {
text,
error: error.error || 'Unknown error'
});
if (callback) {
callback({ success: false, reason: 'audio_error', error });
}
};
// Store reference to current utterance
this.currentUtterance = audio;
// Play the audio
audio.play();
return true;
} catch (error) {
console.error("Browser TTS: Error playing preloaded speech:", error);
// Dispatch error event
this.dispatchEvent('tts:speak:error', {
text: preloadData.text,
error: error.message || 'Unknown error'
});
if (callback) {
setTimeout(() => callback({ success: false, reason: 'audio_error', error }), 0);
}
return false;
}
}
/**
* Speak text
* @param {string} text - Text to speak
* @param {Function} callback - Callback for when speech completes
* @returns {boolean} - Success status
*/
async speak(text, callback = null) {
if (!this.available || !text) {
if (callback) {
setTimeout(() => callback({ success: false, reason: 'not_available' }), 0);
}
return false;
}
try {
// Process text for TTS
const processedText = this.preprocessText(text);
// Use MediaRecorder to capture audio output to WAV
const audioData = await this.synthesizeToWav(processedText);
if (!audioData) {
console.warn("Browser TTS: Failed to generate WAV audio");
if (callback) {
setTimeout(() => callback({ success: false, reason: 'synthesis_error' }), 0);
}
return false;
}
// Create audio element from blob
const audio = new Audio(URL.createObjectURL(audioData.blob));
// Dispatch start event
this.dispatchEvent('tts:speak:start', { text: processedText });
// Set up event listeners
audio.onended = () => {
this.currentUtterance = null;
// Dispatch end event
this.dispatchEvent('tts:speak:end', { text: processedText });
if (callback) {
callback({ success: true });
}
};
audio.onerror = (error) => {
this.currentUtterance = null;
// Dispatch error event
this.dispatchEvent('tts:speak:error', {
text: processedText,
error: error.message || 'Unknown error'
});
if (callback) {
callback({ success: false, reason: 'audio_error', error });
}
};
// Store the current utterance for stopping later
this.currentUtterance = audio;
// Play the audio
audio.play();
return true;
} catch (error) {
console.error("Browser TTS: Error speaking:", error);
// Dispatch error event
this.dispatchEvent('tts:speak:error', {
text,
error: error.message || 'Unknown error'
});
if (callback) {
setTimeout(() => callback({ success: false, reason: 'synthesis_error', error }), 0);
}
return false;
}
}
/**
* Preprocess text for TTS
* @param {string} text - Text to preprocess
* @returns {string} - Processed text
*/
preprocessText(text) {
if (!text) return '';
// Trim whitespace
let processed = text.trim();
// Replace multiple spaces with a single space
processed = processed.replace(/\s+/g, ' ');
// Add a period at the end if there's no punctuation
if (!/[.!?]$/.test(processed)) {
processed += '.';
}
return processed;
}
/**
* Stop speaking
*/
stop() {
if (this.currentUtterance) {
if (this.currentUtterance.stop) {
this.currentUtterance.stop();
} else if (this.currentUtterance.pause) {
this.currentUtterance.pause();
}
this.currentUtterance = null;
}
}
/**
* Check if TTS is available
* @returns {boolean} - True if TTS is available
*/
isAvailable() {
return this.available && this.voiceOptions.voice !== null;
}
/**
* Get handler ID
* @returns {string} - Handler ID
*/
getId() {
return this.id;
}
/**
* Get available voices
* @returns {Array} - Array of voice objects
*/
getVoices() {
// Get localization module for current locale
const localization = this.getModule('localization');
let currentLocale = localization ? localization.getLocale() : 'en-us';
// Create language code variations for matching
const languageCode = currentLocale.split('-')[0]; // e.g., 'en' from 'en-us'
// Filter voices by current locale
const filteredVoices = this.voices.filter(voice => {
const voiceLang = voice.lang.toLowerCase();
return voiceLang.startsWith(languageCode) ||
voiceLang === currentLocale ||
// For handling cases like 'en' matching 'en-us'
(currentLocale.startsWith(voiceLang) && voiceLang.length === 2);
});
// If matching voices found, use them
if (filteredVoices.length > 0) {
return filteredVoices.map(voice => ({
id: voice.voiceURI,
name: voice.name,
lang: voice.lang,
gender: this.inferVoiceGender(voice.name)
}));
}
// If no matching voices found and current locale isn't English,
// try to fallback to English voices
if (languageCode !== 'en') {
const englishVoices = this.voices.filter(voice => {
const voiceLang = voice.lang.toLowerCase();
return voiceLang.startsWith('en');
});
if (englishVoices.length > 0) {
return englishVoices.map(voice => ({
id: voice.voiceURI,
name: voice.name,
lang: voice.lang,
gender: this.inferVoiceGender(voice.name)
}));
}
}
// As a last resort, return all voices
return this.voices.map(voice => ({
id: voice.voiceURI,
name: voice.name,
lang: voice.lang,
gender: this.inferVoiceGender(voice.name)
}));
}
/**
* Infer voice gender from name
* @param {string} name - Voice name
* @returns {string} - Inferred gender ('male', 'female', or 'unknown')
*/
inferVoiceGender(name) {
const lowerName = name.toLowerCase();
// Common terms indicating gender
const maleTerms = ['male', 'man', 'guy', 'boy', 'mr', 'sir', 'him', 'his'];
const femaleTerms = ['female', 'woman', 'lady', 'girl', 'ms', 'mrs', 'miss', 'her', 'hers'];
// Check for explicit gender terms in the name
for (const term of maleTerms) {
if (lowerName.includes(term)) return 'male';
}
for (const term of femaleTerms) {
if (lowerName.includes(term)) return 'female';
}
// Common male/female voice names
if (/(david|james|john|paul|mark|thomas|daniel|jack|william|george|michael|robert|peter|brian|richard|steve|bruce)/i.test(lowerName)) {
return 'male';
}
if (/(mary|sarah|emma|susan|julia|karen|lisa|anna|laura|amy|elizabeth|jennifer|maria|emily|jessica|alice|victoria)/i.test(lowerName)) {
return 'female';
}
return 'unknown';
}
/**
* Set voice options
* @param {Object} options - Voice options
*/
setVoiceOptions(options = {}) {
if (options.voice) {
// Find the voice by ID or name
const voice = this.voices.find(v =>
v.voiceURI === options.voice ||
v.name === options.voice
);
if (voice) {
this.voiceOptions.voice = voice;
}
}
if (typeof options.rate === 'number') {
// Clamp rate between 0.1 and 10
this.voiceOptions.rate = Math.max(0.1, Math.min(10, options.rate));
}
if (typeof options.pitch === 'number') {
// Clamp pitch between 0 and 2
this.voiceOptions.pitch = Math.max(0, Math.min(2, options.pitch));
}
if (typeof options.volume === 'number') {
// Clamp volume between 0 and 1
this.voiceOptions.volume = Math.max(0, Math.min(1, options.volume));
}
}
}