Fix Kokoro TTS integration issues: Remove API key requirement and ensure system-specific options display correctly

This commit is contained in:
2025-04-05 22:06:22 +00:00
parent e5a3016846
commit fc693ae695
11 changed files with 3296 additions and 596 deletions
+393
View File
@@ -0,0 +1,393 @@
/**
* API TTS Module Base Class
* Base class for API-based TTS modules
*/
import { TTSHandlerModule } from './tts-handler-module.js';
export class ApiTTSModuleBase extends TTSHandlerModule {
constructor(id, name) {
super(id, name);
// Basic voice options
this.voiceOptions = {
speed: 1.0,
voice: null
};
// API settings
this.apiKey = '';
this.apiBaseUrl = '';
// State
this.currentAudio = null;
// Bind additional methods
this.bindMethods([
'handleApiKeyChanged',
'handleApiUrlChanged',
'speakPreloaded',
'loadVoices',
'selectVoiceForLocale',
'selectDefaultVoice',
'generateSpeechAudio',
'preprocessText'
]);
}
/**
* Initialize the API TTS module
* @returns {Promise<boolean>} - Resolves with success status
*/
async initialize() {
this.reportProgress(10, `Initializing ${this.name}`);
// Initialize parent
const parentInit = await super.initialize();
if (!parentInit) {
return false;
}
// Get persistence manager
const persistenceManager = this.getModule('persistence-manager');
if (!persistenceManager) {
console.error(`${this.name}: Required dependency 'persistence-manager' not found`);
return false;
}
// Load API key from preferences
this.apiKey = persistenceManager.getPreference('tts', `${this.id}_api_key`) || '';
// Get default API URL
const defaultApiUrl = this.getDefaultApiBaseUrl();
// Set up API base URL from preferences or use default
const savedApiUrl = persistenceManager.getPreference('tts', `${this.id}_api_url`);
this.apiBaseUrl = savedApiUrl || defaultApiUrl;
// If no API URL was saved in preferences, save the default
if (!savedApiUrl && defaultApiUrl) {
persistenceManager.updatePreference('tts', `${this.id}_api_url`, defaultApiUrl);
}
this.reportProgress(30, `${this.name} API configuration loaded`);
// Set up event listeners for API key and URL changes
document.addEventListener('tts:api:keyChanged', this.handleApiKeyChanged);
document.addEventListener('tts:api:urlChanged', this.handleApiUrlChanged);
// Load voices
await this.loadVoices();
this.reportProgress(50, `${this.name} voices loaded`);
// Set up voice from preferences
await this.setupVoiceFromPreferences();
this.reportProgress(70, `${this.name} voice preferences configured`);
// Check if we have an API key
this.isReady = !!this.apiKey;
// Always mark as available for UI configuration purposes
// (even if not ready due to missing API key)
this.reportProgress(100, `${this.name} initialization complete`);
return true;
}
/**
* Get the default API base URL for this provider
* @returns {string} - Default API base URL
*/
getDefaultApiBaseUrl() {
// To be implemented by subclasses
return '';
}
/**
* Set up voice based on preferences and locale
* @returns {Promise<boolean>} - Resolves with success status
*/
async setupVoiceFromPreferences() {
const persistenceManager = this.getModule('persistence-manager');
const localization = this.getModule('localization');
if (!persistenceManager || !localization) {
return false;
}
// Get preferred voice ID from preferences
const preferredVoiceId = persistenceManager.getPreference('tts', `${this.id}_voice`, '');
// Get current locale
const currentLocale = localization.getLocale();
// If we have a preferred voice and available voices, use it
if (preferredVoiceId && this.voices && this.voices.length > 0) {
const voice = this.voices.find(v => v.id === preferredVoiceId);
if (voice) {
this.voiceOptions.voice = voice;
return true;
}
}
// Otherwise select a voice based on locale
if (currentLocale) {
return this.selectVoiceForLocale(currentLocale);
}
// Fall back to default voice
return this.selectDefaultVoice();
}
/**
* Load available voices from API
* @returns {Promise<boolean>} - Resolves with success status
*/
async loadVoices() {
// To be implemented by subclasses
this.voices = [];
return true;
}
/**
* Select a voice for the given locale
* @param {string} locale - Locale code
* @returns {boolean} - Success status
*/
selectVoiceForLocale(locale) {
// To be implemented by subclasses
return this.selectDefaultVoice();
}
/**
* Select a default voice
* @returns {boolean} - Success status
*/
selectDefaultVoice() {
if (this.voices && this.voices.length > 0) {
this.voiceOptions.voice = this.voices[0];
return true;
}
return false;
}
/**
* Generate speech audio blob for the given text using the API.
* @param {string} text - The text to synthesize.
* @returns {Promise<Object>} - A promise that resolves with the audio data object.
*/
async generateSpeechAudio(text) {
// To be implemented by subclasses
return { success: false, reason: 'not_implemented' };
}
/**
* Speak preloaded audio data
* @param {Object} preloadData - Preloaded audio data
* @param {Function} callback - Callback for when speech completes
* @returns {boolean} - Success status
*/
speakPreloaded(preloadData, callback = null) {
if (!preloadData || !preloadData.audioData) {
if (callback) {
callback({ success: false, reason: 'invalid_data' });
}
return false;
}
// Stop any ongoing speech
this.stop();
// Create audio blob
const audioBlob = new Blob([preloadData.audioData], { type: 'audio/mp3' });
const audioUrl = URL.createObjectURL(audioBlob);
// Create audio element
const audio = new Audio(audioUrl);
// Set up event handlers
audio.onended = () => {
this.isSpeaking = false;
if (callback) {
callback({ success: true });
}
URL.revokeObjectURL(audioUrl);
};
audio.onerror = (error) => {
this.isSpeaking = false;
if (callback) {
callback({ success: false, reason: 'playback_error', error });
}
URL.revokeObjectURL(audioUrl);
};
// Start playback
this.currentAudio = audio;
this.isSpeaking = true;
// Handle play error
audio.play().catch(error => {
this.isSpeaking = false;
if (callback) {
callback({ success: false, reason: 'playback_error', error });
}
URL.revokeObjectURL(audioUrl);
});
return true;
}
/**
* Stop speaking
* @returns {boolean} - Success status
*/
stop() {
if (this.currentAudio) {
try {
this.currentAudio.pause();
this.currentAudio.currentTime = 0;
this.currentAudio = null;
this.isSpeaking = false;
return true;
} catch (error) {
console.error(`${this.name}: Error stopping speech:`, error);
return false;
}
}
return true;
}
/**
* Speak text
* @param {string} text - Text to speak
* @param {Function} callback - Callback for when speech completes
* @returns {boolean} - Success status
*/
speak(text, callback = null) {
if (!this.isReady) {
if (callback) {
callback({ success: false, reason: 'not_ready' });
}
return false;
}
// Generate and play speech
this.generateSpeechAudio(text).then(result => {
if (result.success && result.audioData) {
// Create audio from blob and play it
this.speakPreloaded({ audioData: result.audioData }, callback);
} else if (callback) {
callback({ success: false, reason: 'generation_failed' });
}
}).catch(error => {
if (callback) {
callback({ success: false, reason: 'generation_error', error });
}
});
return true;
}
/**
* Preload speech for later playback
* @param {string} text - Text to preload
* @returns {Promise<Object>} - Preloaded speech data
*/
async preloadSpeech(text) {
if (!this.isReady) {
return { success: false, reason: 'not_ready' };
}
try {
// Generate speech
const result = await this.generateSpeechAudio(text);
if (!result.success) {
return { success: false, reason: 'generation_failed' };
}
return {
success: true,
audioData: result.audioData,
text,
duration: result.duration || 0
};
} catch (error) {
return { success: false, reason: 'generation_error', error };
}
}
/**
* Preprocess text for TTS
* @param {string} text - Text to preprocess
* @returns {string} - Processed text
*/
preprocessText(text) {
if (!text) {
return '';
}
// Remove HTML tags
let processed = text.replace(/<[^>]*>/g, ' ');
// Replace special characters
processed = processed.replace(/&/g, ' and ');
// Normalize whitespace
processed = processed.replace(/\s+/g, ' ').trim();
// Add trailing period if missing
if (!/[.!?]$/.test(processed)) {
processed += '.';
}
return processed;
}
/**
* Handle API key change event
* @param {Event} event - Event object
*/
handleApiKeyChanged(event) {
if (event && event.detail && event.detail.provider === this.id) {
const newKey = event.detail.key || '';
// Security check - never use a URL as an API key
if (newKey && newKey.startsWith('http')) {
console.error(`${this.name}: Received URL instead of API key, ignoring it`);
return;
}
// Update API key
this.apiKey = newKey;
// Save to preferences
const persistenceManager = this.getModule('persistence-manager');
if (persistenceManager) {
persistenceManager.updatePreference('tts', `${this.id}_api_key`, newKey);
}
// Update ready state
this.isReady = !!this.apiKey;
}
}
/**
* Handle API URL change event
* @param {Event} event - Event object
*/
handleApiUrlChanged(event) {
if (event && event.detail && event.detail.provider === this.id) {
const newUrl = event.detail.url || this.getDefaultApiBaseUrl();
// Update API URL
this.apiBaseUrl = newUrl;
// Save to preferences
const persistenceManager = this.getModule('persistence-manager');
if (persistenceManager) {
persistenceManager.updatePreference('tts', `${this.id}_api_url`, newUrl);
}
}
}
}
+570
View File
@@ -0,0 +1,570 @@
/**
* BrowserTTSModule for AI Interactive Fiction
* Implementation using the browser's Web Speech API
*/
import { TTSHandlerModule } from './tts-handler-module.js';
/**
* Browser TTS Module - Uses the browser's Web Speech API for TTS
*/
export class BrowserTTSModule extends TTSHandlerModule {
constructor() {
super('browser', 'Browser TTS');
// Voice options
this.voiceOptions = {
voice: null, // Will be set during initialization
rate: 1.0,
pitch: 1.0,
volume: 1.0
};
// State
this.available = false;
this.currentUtterance = null;
// Ensure dependencies are correctly defined from parent class
// this.dependencies should already contain ['persistence-manager', 'localization']
// Bind additional methods beyond those in TTSHandlerModule
this.bindMethods([
'onVoicesChanged',
'loadVoices',
'selectVoiceForLocale',
'synthesizeToWav',
'speakPreloaded',
'speak',
'preprocessText',
'inferVoiceGender'
]);
}
/**
* Initialize the browser TTS module
* @returns {Promise<boolean>} - Resolves with success status
*/
async initialize() {
try {
this.reportProgress(10, 'Initializing Browser TTS');
// Check for browser support
if (!window.speechSynthesis) {
console.error('Browser TTS: Speech synthesis not available in this browser');
return false;
}
this.reportProgress(30, 'Browser TTS supported');
// Initialize parent
const parentInit = await super.initialize();
if (!parentInit) {
console.error('Browser TTS: Parent initialization failed');
return false;
}
// Get required dependencies
const persistenceManager = this.getModule('persistence-manager');
if (!persistenceManager) {
console.error('Browser TTS: Required dependency persistence-manager not found');
return false;
}
const localization = this.getModule('localization');
if (!localization) {
console.error('Browser TTS: Required dependency localization not found');
return false;
}
// Load voices
const voicesLoaded = await this.loadVoices();
if (!voicesLoaded) {
console.error('Browser TTS: Failed to load voices');
return false;
}
// Set speech options from preferences
this.voiceOptions.rate = persistenceManager.getPreference('tts', 'rate', 1.0);
this.voiceOptions.pitch = persistenceManager.getPreference('tts', 'pitch', 1.0);
this.voiceOptions.volume = persistenceManager.getPreference('tts', 'volume', 1.0);
const preferredVoice = persistenceManager.getPreference('tts', 'browser_voice', '');
// Set voice based on current locale
const currentLocale = localization.getLocale() || 'en-us';
await this.selectVoiceForLocale(currentLocale, preferredVoice);
// Listen for locale changes
document.addEventListener('locale:changed', async (event) => {
if (event.detail && event.detail.locale) {
await this.selectVoiceForLocale(event.detail.locale);
}
});
// Listen for voices changed events
if (window.speechSynthesis.onvoiceschanged !== undefined) {
window.speechSynthesis.onvoiceschanged = this.onVoicesChanged;
}
this.isReady = true;
this.available = true;
this.reportProgress(100, 'Browser TTS initialized');
return true;
} catch (error) {
console.error('Browser TTS: Initialization error:', error);
this.isReady = false;
this.available = false;
return false;
}
}
/**
* Handle voices changed event
*/
async onVoicesChanged() {
await this.loadVoices();
// Re-select voice based on current locale
const localization = this.getModule('localization');
const persistenceManager = this.getModule('persistence-manager');
if (localization && persistenceManager) {
const currentLocale = localization.getLocale() || 'en-us';
const preferredVoice = persistenceManager.getPreference('tts', 'browser_voice', '');
await this.selectVoiceForLocale(currentLocale, preferredVoice);
}
}
/**
* Load available voices from the speech synthesis API
* @returns {Promise<boolean>} - Resolves with success status
*/
async loadVoices() {
try {
this.reportProgress(40, 'Loading browser voices');
// Try to get voices
let voices = window.speechSynthesis.getVoices();
// If voices array is empty, wait for onvoiceschanged event
if (!voices || voices.length === 0) {
try {
console.log('Browser TTS: No voices available immediately, waiting for voices to load...');
// Wait for voices to be loaded (with timeout)
voices = await new Promise((resolve, reject) => {
// Set a timeout in case voices never load
const timeout = setTimeout(() => {
console.warn('Browser TTS: Timeout waiting for voices');
// Resolve with empty array instead of rejecting
resolve([]);
}, 3000);
// Listen for voices changed event
window.speechSynthesis.onvoiceschanged = () => {
clearTimeout(timeout);
const loadedVoices = window.speechSynthesis.getVoices();
console.log(`Browser TTS: Voices loaded, found ${loadedVoices.length} voices`);
resolve(loadedVoices);
};
});
} catch (voiceWaitError) {
console.error('Browser TTS: Error waiting for voices:', voiceWaitError);
// Continue with empty voices array
voices = [];
}
}
// Store voices
this.voices = voices || [];
// Log available voices for debugging
console.log(`Browser TTS: Loaded ${this.voices.length} voices`);
if (this.voices.length > 0) {
console.log('Browser TTS: First few voices:', this.voices.slice(0, 3));
}
// If no voices available but speech synthesis is supported, still return true
// Some browsers may not expose voices but still support speech synthesis
if (this.voices.length === 0) {
console.warn('Browser TTS: No voices available, but continuing with default voice');
// Create a default voice entry
this.voices = [{
default: true,
lang: 'en-US',
localService: true,
name: 'Default Voice',
voiceURI: 'default'
}];
}
this.reportProgress(60, 'Browser voices loaded');
return true;
} catch (error) {
console.error('Browser TTS: Error loading voices:', error);
return false;
}
}
/**
* Set voice based on locale
* @param {string} locale - Locale code (e.g., 'en-us', 'de', 'fr')
* @param {string} preferredVoice - Optional preferred voice name
* @returns {Promise<boolean>} - Success status
*/
async selectVoiceForLocale(locale = 'en-us', preferredVoice = '') {
// Normalize locale format
locale = locale.toLowerCase().replace('_', '-');
const languageCode = locale.split('-')[0];
// First try to use the preferred voice if specified
if (preferredVoice) {
const voice = this.voices.find(v =>
v.name === preferredVoice ||
v.voiceURI === preferredVoice
);
if (voice) {
this.voiceOptions.voice = voice;
return true;
}
}
// Try to find a voice that matches the exact locale
const exactMatch = this.voices.find(v =>
v.lang.toLowerCase() === locale
);
if (exactMatch) {
this.voiceOptions.voice = exactMatch;
return true;
}
// Try to find a voice that matches the language code
const languageMatch = this.voices.find(v =>
v.lang.toLowerCase().startsWith(languageCode)
);
if (languageMatch) {
this.voiceOptions.voice = languageMatch;
return true;
}
// Fallback to the first available voice
if (this.voices.length > 0) {
this.voiceOptions.voice = this.voices[0];
return true;
}
// No voices available
return false;
}
/**
* Speak text
* @param {string} text - Text to speak
* @param {Function} callback - Callback for when speech completes
* @returns {boolean} - Success status
*/
speak(text, callback = null) {
if (!this.isReady || !window.speechSynthesis) {
if (callback) {
callback({ success: false, reason: 'not_ready' });
}
return false;
}
// Stop any ongoing speech
this.stop();
const processedText = this.preprocessText(text);
// Create utterance
const utterance = new SpeechSynthesisUtterance(processedText);
// Set options
if (this.voiceOptions.voice) {
utterance.voice = this.voiceOptions.voice;
}
utterance.rate = this.voiceOptions.rate;
utterance.pitch = this.voiceOptions.pitch;
utterance.volume = this.voiceOptions.volume;
// Set up event handlers
utterance.onend = () => {
this.isSpeaking = false;
if (callback) {
callback({ success: true });
}
};
utterance.onerror = (error) => {
this.isSpeaking = false;
console.error('Browser TTS: Speech error', error);
if (callback) {
callback({ success: false, reason: 'synthesis_error', error });
}
};
// Store current utterance
this.currentUtterance = utterance;
this.isSpeaking = true;
// Start speaking
window.speechSynthesis.speak(utterance);
return true;
}
/**
* Preload speech for a text
* @param {string} text - Text to preload
* @returns {Promise<Object>} - Preloaded speech data
*/
async preloadSpeech(text) {
if (!this.isReady || !window.speechSynthesis) {
return { success: false, reason: 'not_ready' };
}
// Generate WAV audio data
const wavResult = await this.synthesizeToWav(text);
if (!wavResult.success) {
return { success: false, reason: 'synthesis_failed' };
}
return {
success: true,
audioData: wavResult.audioData,
text,
duration: wavResult.duration || 0
};
}
/**
* Convert speech synthesis to WAV format
* @param {string} text - Text to synthesize
* @returns {Promise<Object>} - Object with audio data
*/
async synthesizeToWav(text) {
return new Promise((resolve) => {
if (!this.isReady || !window.speechSynthesis) {
resolve({ success: false, reason: 'not_ready' });
return;
}
// Process text for better synthesis
const processedText = this.preprocessText(text);
// Create audio context
const AudioContext = window.AudioContext || window.webkitAudioContext;
if (!AudioContext) {
resolve({ success: false, reason: 'no_audio_context' });
return;
}
const audioContext = new AudioContext();
// Create media stream destination
const destination = audioContext.createMediaStreamDestination();
// Create media recorder
const mediaRecorder = new MediaRecorder(destination.stream);
const audioChunks = [];
// Set up event handlers
mediaRecorder.ondataavailable = (event) => {
if (event.data.size > 0) {
audioChunks.push(event.data);
}
};
mediaRecorder.onstop = () => {
// Create blob from chunks
const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
// Convert blob to array buffer
const reader = new FileReader();
reader.onloadend = () => {
resolve({
success: true,
audioData: reader.result
});
};
reader.onerror = () => {
resolve({ success: false, reason: 'blob_read_error' });
};
reader.readAsArrayBuffer(audioBlob);
};
// Create utterance
const utterance = new SpeechSynthesisUtterance(processedText);
// Set options
if (this.voiceOptions.voice) {
utterance.voice = this.voiceOptions.voice;
}
utterance.rate = this.voiceOptions.rate;
utterance.pitch = this.voiceOptions.pitch;
utterance.volume = this.voiceOptions.volume;
// Start recording
mediaRecorder.start();
// Set up completion handling
utterance.onend = () => {
mediaRecorder.stop();
};
utterance.onerror = (error) => {
console.error('Browser TTS: Synthesis error', error);
mediaRecorder.stop();
resolve({ success: false, reason: 'synthesis_error' });
};
// Start speaking
window.speechSynthesis.speak(utterance);
// Set timeout in case onend never fires
setTimeout(() => {
if (mediaRecorder.state === 'recording') {
mediaRecorder.stop();
}
}, 30000); // 30-second timeout
});
}
/**
* Speak preloaded audio data
* @param {Object} preloadedData - Data from preloadSpeech
* @param {Function} callback - Callback for when speech completes
* @returns {boolean} - Success status
*/
speakPreloaded(preloadedData, callback = null) {
if (!preloadedData || !preloadedData.text) {
console.error('Browser TTS: Invalid preloaded data');
return false;
}
// For browser TTS, we don't use the preloaded data directly
// Instead, we just speak the text again
return this.speak(preloadedData.text, callback);
}
/**
* Preprocess text for TTS
* @param {string} text - Text to preprocess
* @returns {string} - Processed text
*/
preprocessText(text) {
// Remove HTML tags
text = text.replace(/<[^>]*>/g, ' ');
// Replace special characters with their spoken equivalents
text = text.replace(/&/g, ' and ');
// Normalize whitespace
text = text.replace(/\s+/g, ' ').trim();
return text;
}
/**
* Stop speaking
* @returns {boolean} - Success status
*/
stop() {
if (window.speechSynthesis) {
window.speechSynthesis.cancel();
this.isSpeaking = false;
this.currentUtterance = null;
return true;
}
return false;
}
/**
* Get available voices
* @returns {Array} - Array of voice objects
*/
async getVoices() {
if (!this.isReady) {
return [];
}
const localization = this.getModule('localization');
const currentLocale = localization ? localization.getLocale() : 'en-us';
// Normalize locale format
const normalizedLocale = currentLocale.toLowerCase().replace('_', '-');
const languageCode = normalizedLocale.split('-')[0];
// Filter voices by current locale
const filteredVoices = this.voices.filter(voice => {
const voiceLang = voice.lang.toLowerCase();
return voiceLang.startsWith(languageCode) ||
voiceLang === normalizedLocale ||
(normalizedLocale.startsWith(voiceLang) && voiceLang.length === 2);
});
// If matching voices found, use them
if (filteredVoices.length > 0) {
return filteredVoices.map(voice => ({
id: voice.voiceURI,
name: voice.name,
lang: voice.lang,
gender: this.inferVoiceGender(voice.name)
}));
}
// If no matching voices found, return all voices
return this.voices.map(voice => ({
id: voice.voiceURI,
name: voice.name,
lang: voice.lang,
gender: this.inferVoiceGender(voice.name)
}));
}
/**
* Infer voice gender from name
* @param {string} name - Voice name
* @returns {string} - Inferred gender ('male', 'female', or 'unknown')
*/
inferVoiceGender(name) {
const lowerName = name.toLowerCase();
// Common terms indicating gender
const maleTerms = ['male', 'man', 'guy', 'boy', 'mr', 'sir'];
const femaleTerms = ['female', 'woman', 'lady', 'girl', 'ms', 'mrs', 'miss'];
// Check for explicit gender terms in the name
for (const term of maleTerms) {
if (lowerName.includes(term)) return 'male';
}
for (const term of femaleTerms) {
if (lowerName.includes(term)) return 'female';
}
return 'unknown';
}
}
// Register the module with the module registry
// Module registry MUST be accessed via window, not direct import
if (window.moduleRegistry) {
try {
// Create instance first, then register it
const browserTTSModule = new BrowserTTSModule();
window.moduleRegistry.register(browserTTSModule);
console.log('Browser TTS Module registered successfully');
} catch (err) {
console.error('Failed to register Browser TTS Module:', err);
}
} else {
console.error('Module registry not available when attempting to register Browser TTS Module');
}
+270
View File
@@ -0,0 +1,270 @@
/**
* ElevenLabsTTSModule
* Provides TTS via ElevenLabs API
*/
import { ApiTTSModuleBase } from './api-tts-module-base.js';
export class ElevenLabsTTSModule extends ApiTTSModuleBase {
constructor() {
super('elevenlabs', 'ElevenLabs TTS');
// Voice options specific to ElevenLabs
this.voiceOptions = {
voice: 'pNInz6obpgDQGcFmaJgB', // Default voice ID for ElevenLabs
model: 'eleven_multilingual_v2', // Use the multilingual model
speed: 1.0
};
}
/**
* Initialize the ElevenLabs TTS module
* @returns {Promise<boolean>} - Resolves with success status
*/
async initialize() {
try {
this.reportProgress(10, 'Initializing ElevenLabs TTS');
// Initialize parent
const parentInit = await super.initialize();
if (!parentInit) {
console.error('ElevenLabs TTS: Parent initialization failed');
return false;
}
// Get required dependencies
const persistenceManager = this.getModule('persistence-manager');
if (!persistenceManager) {
console.error('ElevenLabs TTS: Required dependency persistence-manager not found');
return false;
}
// Check for API key
const apiKey = persistenceManager.getPreference('elevenlabs', 'api_key', '');
if (!apiKey) {
console.error('ElevenLabs TTS: API key not configured');
return false;
}
// Load voices from ElevenLabs
try {
this.reportProgress(50, 'Loading ElevenLabs voices');
await this.loadVoices(apiKey);
} catch (error) {
console.error('ElevenLabs TTS: Failed to load voices:', error);
return false;
}
// Load preferences
const preferredVoice = persistenceManager.getPreference('elevenlabs', 'voice', this.voiceOptions.voice);
if (preferredVoice) {
this.voiceOptions.voice = preferredVoice;
}
const preferredModel = persistenceManager.getPreference('elevenlabs', 'model', this.voiceOptions.model);
if (preferredModel) {
this.voiceOptions.model = preferredModel;
}
const preferredSpeed = persistenceManager.getPreference('elevenlabs', 'speed', this.voiceOptions.speed);
if (typeof preferredSpeed === 'number') {
this.voiceOptions.speed = preferredSpeed;
}
this.isReady = true;
this.reportProgress(100, 'ElevenLabs TTS initialized');
return true;
} catch (error) {
console.error('ElevenLabs TTS: Initialization error:', error);
this.isReady = false;
return false;
}
}
/**
* Get the default API base URL for ElevenLabs
* @returns {string} - Default API base URL
*/
getDefaultApiBaseUrl() {
return 'https://api.elevenlabs.io/v1';
}
/**
* Load available voices from ElevenLabs API
* @param {string} apiKey - API key for authentication
* @returns {Promise<boolean>} - Resolves with success status
*/
async loadVoices(apiKey) {
// Set default voices that will be used if API call fails
this.voices = [
{ id: 'pNInz6obpgDQGcFmaJgB', name: 'Rachel', language: 'en' },
{ id: '21m00Tcm4TlvDq8ikWAM', name: 'Adam', language: 'en' },
{ id: 'AZnzlk1XvdvUeBnXmlld', name: 'Antoni', language: 'en' },
{ id: 'EXAVITQu4vr4xnSDxMaL', name: 'Bella', language: 'en' },
{ id: 'ErXwobaYiN019PkySvjV', name: 'Daniel', language: 'en' }
];
// Only load from API if we have an API key
if (!apiKey) {
return true;
}
try {
const response = await fetch(`${this.apiBaseUrl}/voices`, {
method: 'GET',
headers: {
'xi-api-key': apiKey,
'Content-Type': 'application/json'
}
});
if (!response.ok) {
console.error(`ElevenLabs TTS: API error: ${response.status} ${response.statusText}`);
return true; // Use defaults, but don't fail initialization
}
const data = await response.json();
if (data && data.voices && Array.isArray(data.voices)) {
// Transform API response to our internal format
this.voices = data.voices.map(voice => ({
id: voice.voice_id,
name: voice.name,
language: 'en', // ElevenLabs doesn't provide language info
preview: voice.preview_url
}));
return true;
}
} catch (error) {
console.error('ElevenLabs TTS: Error loading voices:', error);
}
// If API call failed, we still return true since we have default voices
return true;
}
/**
* Select a voice for the given locale
* @param {string} locale - Locale code
* @returns {boolean} - Success status
*/
selectVoiceForLocale(locale) {
if (!this.voices || this.voices.length === 0) {
return this.selectDefaultVoice();
}
// ElevenLabs doesn't provide language info for voices
// Simply use the first voice as default
return this.selectDefaultVoice();
}
/**
* Generate speech audio data using ElevenLabs API
* @param {string} text - Text to generate speech for
* @returns {Promise<Object>} - Audio data object
*/
async generateSpeechAudio(text) {
// Don't attempt to call the API if no API key is set or text is empty
if (!text || !this.apiKey) {
return { success: false, reason: 'missing_api_key_or_text' };
}
try {
// Process the text
const processedText = this.preprocessText(text);
// Create request payload
const payload = {
text: processedText,
model_id: this.voiceOptions.model || 'eleven_multilingual_v2',
voice_settings: {
stability: 0.5,
similarity_boost: 0.75,
style: 0.0,
use_speaker_boost: true,
speed: this.voiceOptions.speed || 1.0
}
};
// Make API request
const response = await fetch(`${this.apiBaseUrl}/text-to-speech/${this.voiceOptions.voice}?optimize_streaming_latency=0`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'xi-api-key': this.apiKey,
'Accept': 'audio/wav'
},
body: JSON.stringify(payload)
});
if (!response.ok) {
throw new Error(`API error: ${response.status} ${response.statusText}`);
}
// Get audio blob from response
const audioBlob = await response.blob();
// Convert to array buffer for consistency with other modules
const arrayBuffer = await audioBlob.arrayBuffer();
return {
success: true,
audioData: arrayBuffer
};
} catch (error) {
console.error('ElevenLabs TTS: Error generating speech:', error);
return {
success: false,
reason: 'api_error',
error: error.message
};
}
}
/**
* Set voice options
* @param {Object} options - Voice options
*/
setVoiceOptions(options = {}) {
// Call parent method for common options
if (options.voice) {
this.voiceOptions.voice = options.voice;
// Save voice preference
const persistenceManager = this.getModule('persistence-manager');
if (persistenceManager) {
persistenceManager.updatePreference('tts', 'elevenlabs_voice', options.voice);
}
}
if (typeof options.speed === 'number') {
this.voiceOptions.speed = Math.max(0.5, Math.min(2.0, options.speed));
}
// Handle ElevenLabs-specific options
if (options.model) {
this.voiceOptions.model = options.model;
// Save model preference
const persistenceManager = this.getModule('persistence-manager');
if (persistenceManager) {
persistenceManager.updatePreference('tts', 'elevenlabs_model', options.model);
}
}
}
}
// Register the module with the module registry
// Module registry MUST be accessed via window, not direct import
if (window.moduleRegistry) {
try {
// Create instance first, then register it
const elevenLabsTTSModule = new ElevenLabsTTSModule();
window.moduleRegistry.register(elevenLabsTTSModule);
console.log('ElevenLabs TTS Module registered successfully');
} catch (err) {
console.error('Failed to register ElevenLabs TTS Module:', err);
}
} else {
console.error('Module registry not available when attempting to register ElevenLabs TTS Module');
}
+1 -1
View File
@@ -425,7 +425,7 @@ export class KokoroHandler extends TTSHandler {
try { try {
const persistenceManager = this.getModule('persistence-manager'); const persistenceManager = this.getModule('persistence-manager');
if (persistenceManager) { if (persistenceManager) {
persistenceManager.setPreference('tts-voice-kokoro', foundVoice.id); persistenceManager.updatePreference('tts-voice-kokoro', foundVoice.id);
} }
} catch (error) { } catch (error) {
console.error('Kokoro TTS: Error saving voice preference:', error); console.error('Kokoro TTS: Error saving voice preference:', error);
+657
View File
@@ -0,0 +1,657 @@
/**
* KokoroTTSModule for AI Interactive Fiction
* Implementation using the Kokoro library
*/
import { TTSHandlerModule } from './tts-handler-module.js';
export class KokoroTTSModule extends TTSHandlerModule {
constructor() {
super('kokoro', 'Kokoro TTS');
// State
this.iframe = null;
this.currentAudio = null;
this.pendingGenerations = new Map();
this.generationCounter = 0;
this.voices = [];
this.lastProgressTime = null;
this.lastProgressValue = null;
this.modelLoaded = false;
// Bind additional methods beyond those in TTSHandlerModule
this.bindMethods([
'handleIframeMessage',
'setupVoiceFromPreferences',
'generateSpeech',
'speakPreloaded',
'preprocessText',
'pause',
'resume',
'getDefaultVoices'
]);
}
/**
* Initialize the Kokoro TTS module
* @returns {Promise<boolean>} - Resolves with success status
*/
async initialize() {
try {
console.log('Kokoro TTS: Initializing');
this.state = 'INITIALIZING';
// Get dependencies
this.reportProgress(10, 'Loading dependencies');
// The persistence manager is required for preferences
const persistenceManager = this.getModule('persistence-manager');
if (!persistenceManager) {
console.error('Kokoro TTS: Required dependency persistence-manager not found');
return false;
}
// Try to check if the kokoro-js.js resource exists before proceeding
try {
this.reportProgress(20, 'Checking for Kokoro TTS resources');
const response = await fetch('/js/kokoro-js.js', { method: 'HEAD' });
if (!response.ok) {
console.error(`Kokoro TTS: Required resource kokoro-js.js not found (${response.status})`);
throw new Error('Kokoro TTS resource not available');
}
console.log('Kokoro TTS: Resources available');
} catch (resourceError) {
console.error('Kokoro TTS: Error checking resources', resourceError);
return false;
}
// Create iframe for Kokoro TTS
this.reportProgress(30, 'Creating Kokoro TTS iframe');
console.log('Kokoro TTS: Creating iframe for Kokoro loader');
const iframe = document.createElement('iframe');
iframe.src = '/kokoro-loader.html';
iframe.style.display = 'none';
document.body.appendChild(iframe);
this.iframe = iframe;
// Wait for iframe to load
try {
await new Promise((resolve, reject) => {
iframe.onload = () => {
console.log('Kokoro TTS: Iframe loaded successfully');
resolve();
};
iframe.onerror = (error) => {
console.error('Kokoro TTS: Iframe failed to load:', error);
reject(new Error('Kokoro TTS: Iframe failed to load'));
};
iframe.onabort = () => {
console.error('Kokoro TTS: Iframe load aborted');
reject(new Error('Kokoro TTS: Iframe load aborted'));
};
});
} catch (iframeError) {
console.error('Kokoro TTS: Error loading iframe:', iframeError);
return false;
}
// Add message event listener for progress updates from iframe
window.addEventListener('message', this.handleIframeMessage);
// Wait for model to initialize
try {
this.reportProgress(50, 'Loading Kokoro model');
console.log('Kokoro TTS: Waiting for model to initialize');
await new Promise((resolve, reject) => {
// Create one-time handler for kokoro:ready message
const readyHandler = (event) => {
if (event.data && event.data.type === 'kokoro:ready') {
window.removeEventListener('message', readyHandler);
// Validate the success status from the event
if (event.data.success === false) {
console.error('Kokoro TTS: Model initialization failed:', event.data.error || 'Unknown error');
reject(new Error('Kokoro TTS: ' + (event.data.error || 'Model initialization failed')));
return;
}
console.log('Kokoro TTS: Model initialized successfully');
this.modelLoaded = true;
this.voices = event.data.voices || this.getDefaultVoices();
resolve();
}
};
window.addEventListener('message', readyHandler);
// Send initialization message to iframe
this.reportProgress(60, 'Initializing Kokoro model');
console.log('Kokoro TTS: Sending initialization message to iframe');
iframe.contentWindow.postMessage({ type: 'kokoro:initialize' }, '*');
});
} catch (modelError) {
console.error('Kokoro TTS: Error initializing model:', modelError);
return false;
}
// Get default voices
this.reportProgress(80, 'Loading Kokoro voices');
this.voices = this.getDefaultVoices();
console.log('Kokoro TTS: Loaded default voices:', this.voices);
// Set voice based on preferences
this.reportProgress(90, 'Setting up voice preferences');
await this.setupVoiceFromPreferences(persistenceManager);
console.log('Kokoro TTS: Voice preferences set up');
this.isReady = true;
this.reportProgress(100, 'Kokoro TTS initialized');
console.log('Kokoro TTS: Initialization complete');
return true;
} catch (error) {
console.error('Kokoro TTS: Initialization error:', error);
this.isReady = false;
return false;
}
}
/**
* Handle messages from the iframe
* @param {MessageEvent} event - Message event
*/
handleIframeMessage = (event) => {
// Only process messages from our iframe
if (!this.iframe || event.source !== this.iframe.contentWindow) {
return;
}
// Process message
if (event.data && event.data.type) {
switch (event.data.type) {
case 'kokoro:progress':
if (event.data.progress) {
// Track the last time we received a progress update
this.lastProgressTime = Date.now();
this.lastProgressValue = event.data.progress;
this.modelLoadingProgress = event.data.progress;
// Update progress
this.reportProgress(60 + Math.floor(event.data.progress * 0.3), `Loading Kokoro model: ${event.data.progress.toFixed(0)}%`);
}
break;
case 'kokoro:ready':
// Clear any timeout we might have set
this.modelLoaded = true;
this.reportProgress(90, 'Kokoro model loaded');
console.log('Kokoro TTS: Model ready event received');
break;
case 'kokoro:error':
console.error('Kokoro TTS: Error from iframe:', event.data.error);
this.state = 'ERROR';
break;
case 'kokoro:speech-generated':
// Handle speech generation completion
if (event.data.id !== undefined && this.pendingGenerations.has(event.data.id)) {
const resolver = this.pendingGenerations.get(event.data.id);
this.pendingGenerations.delete(event.data.id);
if (event.data.error) {
resolver.reject(new Error(event.data.error));
} else {
resolver.resolve({
success: true,
audioData: event.data.audioData,
duration: event.data.duration || 0
});
}
}
break;
case 'kokoro:voices':
// Update available voices
if (Array.isArray(event.data.voices)) {
this.voices = event.data.voices;
document.dispatchEvent(new CustomEvent('tts:voices-updated', {
detail: { engine: 'kokoro', voices: this.voices }
}));
}
break;
}
}
}
/**
* Set up the voice from preferences
*/
async setupVoiceFromPreferences(persistenceManager) {
if (!persistenceManager) {
return false;
}
// Get current locale
const localization = this.getModule('localization');
const locale = localization ? localization.getLocale() : null;
// Get preferred voice from preferences
const preferredVoiceId = persistenceManager.getPreference('tts', 'kokoro_voice', '');
// Find matching voice
let selectedVoice = null;
if (preferredVoiceId) {
// Try to find the specific voice
selectedVoice = this.voices.find(v => v.id === preferredVoiceId);
}
if (!selectedVoice) {
// Find a voice for the current locale
const normalizedLocale = locale ? locale.toLowerCase().replace('_', '-') : 'en-us';
const languageCode = normalizedLocale.split('-')[0];
// Try to find an exact locale match
selectedVoice = this.voices.find(v =>
v.lang && v.lang.toLowerCase() === normalizedLocale
);
// If not found, try to find a language match
if (!selectedVoice) {
selectedVoice = this.voices.find(v =>
v.lang && v.lang.toLowerCase().startsWith(languageCode)
);
}
// If still not found, use the first voice
if (!selectedVoice && this.voices.length > 0) {
selectedVoice = this.voices[0];
}
}
// Set the voice
if (selectedVoice) {
this.setVoice(selectedVoice);
return true;
}
return false;
}
/**
* Set voice for TTS
* @param {Object} voice - Voice to set
* @returns {boolean} - Success status
*/
setVoice(voice) {
if (!voice || !voice.id) {
return false;
}
this.currentVoice = voice;
// Save to preferences
const persistenceManager = this.getModule('persistence-manager');
if (persistenceManager) {
persistenceManager.updatePreference('tts', 'kokoro_voice', voice.id);
}
// Send message to iframe
if (this.iframe && this.iframe.contentWindow) {
this.iframe.contentWindow.postMessage({
type: 'kokoro:set-voice',
voiceId: voice.id
}, '*');
}
return true;
}
/**
* Set options for TTS
* @param {Object} options - Options to set
* @returns {boolean} - Success status
*/
setOptions(options) {
if (!options) {
return false;
}
// Update rate and volume if provided
if (options.rate !== undefined) {
this.options.rate = options.rate;
}
if (options.volume !== undefined) {
this.options.volume = options.volume;
}
return true;
}
/**
* Get available voices
* @returns {Array} - Array of voice objects
*/
async getVoices() {
// If no voices are loaded yet, return default voices
if (!this.voices || this.voices.length === 0) {
return this.getDefaultVoices();
}
return this.voices;
}
/**
* Preprocess text for TTS
* @param {string} text - Text to preprocess
* @returns {string} - Preprocessed text
*/
preprocessText(text) {
// Remove HTML tags
text = text.replace(/<[^>]*>/g, ' ');
// Replace special characters
text = text.replace(/&/g, ' and ');
// Normalize whitespace
text = text.replace(/\s+/g, ' ').trim();
return text;
}
/**
* Preload speech for later playback
* @param {string} text - Text to preload
* @returns {Promise<Object>} - Resolves with preloaded audio data
*/
async preloadSpeech(text) {
if (!this.isReady) {
return { success: false, reason: 'not_ready' };
}
// Generate speech audio data
const result = await this.generateSpeech(text);
if (!result.success) {
return { success: false, reason: 'generation_failed' };
}
return {
success: true,
audioData: result.audioData,
text,
duration: result.duration || 0
};
}
/**
* Speak text using preloaded audio
* @param {Object} preloadData - Preloaded audio data
* @param {Function} callback - Callback for when speech completes
* @returns {boolean} - Success status
*/
speakPreloaded(preloadData, callback = null) {
if (!this.isReady || !preloadData || !preloadData.audioData) {
if (callback) {
callback({ success: false, reason: 'invalid_data' });
}
return false;
}
// Stop any ongoing speech
this.stop();
// Create audio from blob
const audioBlob = new Blob([preloadData.audioData], { type: 'audio/mp3' });
const audioUrl = URL.createObjectURL(audioBlob);
const audio = new Audio(audioUrl);
audio.volume = this.options.volume;
audio.playbackRate = this.options.rate;
// Set up event handlers
audio.onended = () => {
this.isSpeaking = false;
if (callback) {
callback({ success: true });
}
URL.revokeObjectURL(audioUrl);
};
audio.onerror = (error) => {
this.isSpeaking = false;
if (callback) {
callback({ success: false, reason: 'playback_error', error });
}
URL.revokeObjectURL(audioUrl);
};
// Start playback
this.currentAudio = audio;
this.isSpeaking = true;
audio.play().catch(error => {
this.isSpeaking = false;
if (callback) {
callback({ success: false, reason: 'playback_error', error });
}
URL.revokeObjectURL(audioUrl);
});
return true;
}
/**
* Speak text
* @param {string} text - Text to speak
* @param {Function} callback - Callback for when speech completes
* @returns {boolean} - Success status
*/
speak(text, callback = null) {
if (!this.isReady) {
if (callback) {
callback({ success: false, reason: 'not_ready' });
}
return false;
}
// Preprocess text
const processedText = this.preprocessText(text);
// Generate and play speech
this.generateSpeech(processedText).then(result => {
if (result.success && result.audioData) {
// Create audio blob and URL
const audioBlob = new Blob([result.audioData], { type: 'audio/mp3' });
const audioUrl = URL.createObjectURL(audioBlob);
// Stop any ongoing speech
this.stop();
// Create and play audio
const audio = new Audio(audioUrl);
audio.volume = this.options.volume;
audio.playbackRate = this.options.rate;
// Set up event handlers
audio.onended = () => {
this.isSpeaking = false;
if (callback) {
callback({ success: true });
}
URL.revokeObjectURL(audioUrl);
};
audio.onerror = (error) => {
this.isSpeaking = false;
if (callback) {
callback({ success: false, reason: 'playback_error', error });
}
URL.revokeObjectURL(audioUrl);
};
// Start playback
this.currentAudio = audio;
this.isSpeaking = true;
audio.play().catch(error => {
this.isSpeaking = false;
if (callback) {
callback({ success: false, reason: 'playback_error', error });
}
});
} else {
if (callback) {
callback({ success: false, reason: 'generation_failed' });
}
}
}).catch(error => {
if (callback) {
callback({ success: false, reason: 'generation_error', error });
}
});
return true;
}
/**
* Generate speech using the iframe
* @param {string} text - Text to generate speech for
* @returns {Promise<Object>} - Resolves with audio data
*/
async generateSpeech(text) {
if (!this.isReady || !this.iframe || !this.iframe.contentWindow) {
return { success: false, reason: 'not_ready' };
}
// Process text
const processedText = this.preprocessText(text);
return new Promise((resolve, reject) => {
// Generate unique ID for this request
const id = this.generationCounter++;
// Store resolver functions
this.pendingGenerations.set(id, { resolve, reject });
// Send request to iframe
this.iframe.contentWindow.postMessage({
type: 'kokoro:generate-speech',
text: processedText,
id,
voiceId: this.currentVoice ? this.currentVoice.id : null
}, '*');
});
}
/**
* Stop current speech
* @returns {boolean} - Success status
*/
stop() {
if (this.currentAudio) {
try {
this.currentAudio.pause();
this.currentAudio.currentTime = 0;
this.currentAudio = null;
this.isSpeaking = false;
return true;
} catch (error) {
console.error('Kokoro TTS: Error stopping speech:', error);
return false;
}
}
return true;
}
/**
* Pause current speech
* @returns {boolean} - Success status
*/
pause() {
if (this.currentAudio) {
try {
this.currentAudio.pause();
return true;
} catch (error) {
console.error('Kokoro TTS: Error pausing speech:', error);
return false;
}
}
return true;
}
/**
* Resume current speech
* @returns {boolean} - Success status
*/
resume() {
if (this.currentAudio) {
try {
this.currentAudio.play();
return true;
} catch (error) {
console.error('Kokoro TTS: Error resuming speech:', error);
return false;
}
}
return false;
}
/**
* Get default voices for current locale
* @returns {Array} Default voices
*/
getDefaultVoices() {
return [
// American Female voices
{ id: 'af_heart', name: 'Heart', lang: 'en-US', gender: 'female' },
{ id: 'af_daisy', name: 'Daisy', lang: 'en-US', gender: 'female' },
{ id: 'af_soft', name: 'Soft', lang: 'en-US', gender: 'female' },
{ id: 'af_glados', name: 'GLaDOS', lang: 'en-US', gender: 'female' },
{ id: 'af_southern_belle', name: 'Southern Belle', lang: 'en-US', gender: 'female' },
{ id: 'af_dramatic', name: 'Dramatic', lang: 'en-US', gender: 'female' },
{ id: 'af_valley_girl', name: 'Valley Girl', lang: 'en-US', gender: 'female' },
{ id: 'af_british', name: 'British', lang: 'en-US', gender: 'female' },
{ id: 'af_russian', name: 'Russian', lang: 'en-US', gender: 'female' },
{ id: 'af_german', name: 'German', lang: 'en-US', gender: 'female' },
{ id: 'af_cheeky_cute', name: 'Cheeky Cute', lang: 'en-US', gender: 'female' },
// American Male voices
{ id: 'am_bruce', name: 'Bruce', lang: 'en-US', gender: 'male' },
{ id: 'am_announcer', name: 'Announcer', lang: 'en-US', gender: 'male' },
{ id: 'am_radio_host', name: 'Radio Host', lang: 'en-US', gender: 'male' },
// British Female voices
{ id: 'bf_charlotte', name: 'Charlotte', lang: 'en-GB', gender: 'female' },
{ id: 'bf_elizabeth', name: 'Elizabeth', lang: 'en-GB', gender: 'female' },
{ id: 'bf_lily', name: 'Lily', lang: 'en-GB', gender: 'female' },
{ id: 'bf_olivia', name: 'Olivia', lang: 'en-GB', gender: 'female' },
{ id: 'bf_victoria', name: 'Victoria', lang: 'en-GB', gender: 'female' },
// British Male voices
{ id: 'bm_william', name: 'William', lang: 'en-GB', gender: 'male' },
{ id: 'bm_arthur', name: 'Arthur', lang: 'en-GB', gender: 'male' },
{ id: 'bm_george', name: 'George', lang: 'en-GB', gender: 'male' },
{ id: 'bm_harry', name: 'Harry', lang: 'en-GB', gender: 'male' },
{ id: 'bm_jack', name: 'Jack', lang: 'en-GB', gender: 'male' }
];
}
}
// Register the module with the module registry
// Module registry MUST be accessed via window, not direct import
if (window.moduleRegistry) {
try {
// Create instance first, then register it
const kokoroTTSModule = new KokoroTTSModule();
window.moduleRegistry.register(kokoroTTSModule);
console.log('Kokoro TTS Module registered successfully');
} catch (err) {
console.error('Failed to register Kokoro TTS Module:', err);
}
} else {
console.error('Module registry not available when attempting to register Kokoro TTS Module');
}
+14
View File
@@ -96,6 +96,12 @@ const ModuleLoader = (function() {
*/ */
async function loadModuleScripts() { async function loadModuleScripts() {
// Define dependency scripts that need to be loaded first but aren't modules themselves
const dependenciesToLoad = [
{ script: '/js/api-tts-module-base.js' }, // Abstract base class, not a module
{ script: '/js/tts-handler-module.js' } // Abstract base class for TTS handlers, not a module
];
// Define modules with their weights // Define modules with their weights
const modulesToLoad = [ const modulesToLoad = [
// Core functionality modules // Core functionality modules
@@ -108,6 +114,10 @@ const ModuleLoader = (function() {
// Audio and TTS modules // Audio and TTS modules
{ id: 'audio-manager', script: '/js/audio-manager.js', weight: 60 }, { id: 'audio-manager', script: '/js/audio-manager.js', weight: 60 },
{ id: 'kokoro', script: '/js/kokoro-tts-module.js', weight: 65 },
{ id: 'browser', script: '/js/browser-tts-module.js', weight: 65 },
{ id: 'elevenlabs', script: '/js/elevenlabs-tts-module.js', weight: 65 },
{ id: 'openai', script: '/js/openai-tts-module.js', weight: 65 },
{ id: 'tts-factory', script: '/js/tts-factory.js', weight: 70 }, // TTSFactory must be loaded before TTSPlayer { id: 'tts-factory', script: '/js/tts-factory.js', weight: 70 }, // TTSFactory must be loaded before TTSPlayer
{ id: 'tts', script: '/js/tts-player.js', weight: 75 }, { id: 'tts', script: '/js/tts-player.js', weight: 75 },
@@ -134,6 +144,10 @@ const ModuleLoader = (function() {
createModuleListItem(module.id, getModuleNameFromId(module.id)); createModuleListItem(module.id, getModuleNameFromId(module.id));
}); });
// Load dependencies first
const loadDependencies = dependenciesToLoad.map(dependency => loadScript(dependency.script));
await Promise.all(loadDependencies);
// Load each module script // Load each module script
const loadPromises = modulesToLoad.map(module => loadScript(module.script)); const loadPromises = modulesToLoad.map(module => loadScript(module.script));
return Promise.all(loadPromises); return Promise.all(loadPromises);
+255
View File
@@ -0,0 +1,255 @@
/**
* OpenAITTSModule
* Provides TTS via OpenAI API
*/
import { ApiTTSModuleBase } from './api-tts-module-base.js';
export class OpenAITTSModule extends ApiTTSModuleBase {
constructor() {
super('openai', 'OpenAI TTS');
// Voice options specific to OpenAI
this.voiceOptions = {
voice: 'alloy', // Default voice for OpenAI
model: 'tts-1', // Standard model
speed: 1.0,
response_format: 'mp3' // OpenAI supports mp3, opus, aac, and flac (not wav)
};
// Predefined voices - OpenAI has a fixed set
this.voices = [
{ id: 'alloy', name: 'Alloy', language: 'en' },
{ id: 'echo', name: 'Echo', language: 'en' },
{ id: 'fable', name: 'Fable', language: 'en' },
{ id: 'onyx', name: 'Onyx', language: 'en' },
{ id: 'nova', name: 'Nova', language: 'en' },
{ id: 'shimmer', name: 'Shimmer', language: 'en' }
];
}
/**
* Get the default API base URL for OpenAI
* @returns {string} - Default API base URL
*/
getDefaultApiBaseUrl() {
return 'https://api.openai.com/v1';
}
/**
* Initialize the module
* @returns {Promise<boolean>} - Resolves with success status
*/
async initialize() {
try {
this.reportProgress(10, 'Initializing OpenAI TTS');
// Initialize parent
const parentInit = await super.initialize();
if (!parentInit) {
console.error('OpenAI TTS: Parent initialization failed');
return false;
}
// Get required dependencies
const persistenceManager = this.getModule('persistence-manager');
if (!persistenceManager) {
console.error('OpenAI TTS: Required dependency persistence-manager not found');
return false;
}
// Check for API key
const apiKey = persistenceManager.getPreference('openai', 'api_key', '');
if (!apiKey) {
console.error('OpenAI TTS: API key not configured');
return false;
}
// Set API key
this.apiKey = apiKey;
// Load preferences
const preferredVoice = persistenceManager.getPreference('openai', 'voice', this.voiceOptions.voice);
if (preferredVoice) {
this.voiceOptions.voice = preferredVoice;
}
const preferredModel = persistenceManager.getPreference('openai', 'model', this.voiceOptions.model);
if (preferredModel) {
this.voiceOptions.model = preferredModel;
}
const preferredSpeed = persistenceManager.getPreference('openai', 'speed', this.voiceOptions.speed);
if (typeof preferredSpeed === 'number') {
this.voiceOptions.speed = preferredSpeed;
}
// Setup available voices
this.voices = this.getAvailableVoices();
this.isReady = true;
this.reportProgress(100, 'OpenAI TTS initialized');
return true;
} catch (error) {
console.error('OpenAI TTS: Initialization error:', error);
this.isReady = false;
return false;
}
}
/**
* Load available voices
* @returns {Promise<boolean>} - Resolves with success status
*/
async loadVoices() {
// OpenAI has a fixed set of voices, no need to fetch them
return true;
}
/**
* Select a voice for the given locale
* @param {string} locale - Locale code
* @returns {boolean} - Success status
*/
selectVoiceForLocale(locale) {
// Extract language code from locale (e.g., 'en-US' -> 'en')
const langCode = locale.split('-')[0].toLowerCase();
// All OpenAI voices are English-based
// For English locales, we could customize the voice selection
// For non-English locales, we'll just use the default
// In this simple implementation, we'll just use the default voice
return this.selectDefaultVoice();
}
/**
* Select a default voice
* @returns {boolean} - Success status
*/
selectDefaultVoice() {
this.voiceOptions.voice = 'alloy';
return true;
}
/**
* Generate speech audio data using OpenAI API
* @param {string} text - Text to generate speech for
* @returns {Promise<Object>} - Audio data object
*/
async generateSpeechAudio(text) {
if (!text || !this.apiKey) {
return {
success: false,
reason: 'missing_api_key_or_text'
};
}
try {
// Process the text
const processedText = this.preprocessText(text);
// Create request payload
const payload = {
model: this.voiceOptions.model || 'tts-1',
input: processedText,
voice: this.voiceOptions.voice || 'alloy',
response_format: this.voiceOptions.response_format || 'mp3',
speed: this.voiceOptions.speed || 1.0
};
// Make API request
const response = await fetch(`${this.apiBaseUrl}/audio/speech`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${this.apiKey}`
},
body: JSON.stringify(payload)
});
if (!response.ok) {
const errorText = await response.text();
throw new Error(`API error: ${response.status} ${response.statusText} - ${errorText}`);
}
// Get audio blob from response
const audioBlob = await response.blob();
// Convert to array buffer for consistency with other modules
const arrayBuffer = await audioBlob.arrayBuffer();
return {
success: true,
audioData: arrayBuffer
};
} catch (error) {
console.error('OpenAI TTS: Error generating speech:', error);
return {
success: false,
reason: 'api_error',
error: error.message
};
}
}
/**
* Set voice options
* @param {Object} options - Voice options
*/
setVoiceOptions(options = {}) {
// Handle common options
if (options.voice) {
this.voiceOptions.voice = options.voice;
// Save voice preference
const persistenceManager = this.getModule('persistence-manager');
if (persistenceManager) {
persistenceManager.updatePreference('tts', 'openai_voice', options.voice);
}
}
if (typeof options.speed === 'number') {
this.voiceOptions.speed = Math.max(0.5, Math.min(2.0, options.speed));
}
// Handle OpenAI-specific options
if (options.model) {
this.voiceOptions.model = options.model;
// Save the model preference
const persistenceManager = this.getModule('persistence-manager');
if (persistenceManager) {
persistenceManager.updatePreference('tts', 'openai_model', options.model);
}
}
if (options.response_format) {
// Ensure valid format: mp3, opus, aac, or flac
const validFormats = ['mp3', 'opus', 'aac', 'flac'];
if (validFormats.includes(options.response_format)) {
this.voiceOptions.response_format = options.response_format;
// Save the format preference
const persistenceManager = this.getModule('persistence-manager');
if (persistenceManager) {
persistenceManager.updatePreference('tts', 'openai_format', options.response_format);
}
}
}
}
}
// Register the module with the module registry
// Module registry MUST be accessed via window, not direct import
if (window.moduleRegistry) {
try {
// Create instance first, then register it
const openAITTSModule = new OpenAITTSModule();
window.moduleRegistry.register(openAITTSModule);
console.log('OpenAI TTS Module registered successfully');
} catch (err) {
console.error('Failed to register OpenAI TTS Module:', err);
}
} else {
console.error('Module registry not available when attempting to register OpenAI TTS Module');
}
+36 -8
View File
@@ -632,13 +632,19 @@ class OptionsUIModule extends BaseModule {
* Show the options modal * Show the options modal
*/ */
show() { show() {
if (!this.modal) return;
// Reload preferences before showing
this.loadPreferences();
// Show modal // Show modal
this.modal.style.display = 'flex'; if (this.modal) {
this.modal.style.display = 'flex';
// Refresh TTS dropdown
this.populateTtsSystems();
// Make sure the UI reflects the current voice
this.populateVoices();
// Update API settings visibility based on the current selection
this.updateApiSettingsVisibility();
}
} }
/** /**
@@ -669,6 +675,10 @@ class OptionsUIModule extends BaseModule {
const ttsFactory = this.getModule('tts-factory'); const ttsFactory = this.getModule('tts-factory');
if (!ttsFactory) return; if (!ttsFactory) return;
// Debug TTS handlers to see what's happening
console.log('Options UI: Debugging TTS handlers before populating dropdown');
ttsFactory.debugTTSHandlers();
// Clear existing options // Clear existing options
this.elements.ttsSystem.innerHTML = ''; this.elements.ttsSystem.innerHTML = '';
@@ -1123,9 +1133,27 @@ class OptionsUIModule extends BaseModule {
const available = event.detail?.available || false; const available = event.detail?.available || false;
// Update the TTS options visibility // DON'T hide the TTS section completely, as this prevents configuring API keys
// Instead, just mark it visually (we'll keep controls accessible)
if (this.elements.ttsSection) { if (this.elements.ttsSection) {
this.elements.ttsSection.style.display = available ? 'block' : 'none'; // Set a visual indicator that TTS is not working, but keep it visible
this.elements.ttsSection.classList.toggle('tts-unavailable', !available);
// Add status message if not available
if (!available && !this.elements.ttsUnavailableMessage) {
const statusDiv = document.createElement('div');
statusDiv.className = 'tts-status-message';
statusDiv.innerHTML = '<strong>TTS Unavailable</strong>: Check logs for details. You can still configure API keys below.';
statusDiv.style.color = '#ca3c3c';
statusDiv.style.padding = '5px 0';
statusDiv.style.marginBottom = '10px';
this.elements.ttsUnavailableMessage = statusDiv;
// Insert at the top of the TTS section
this.elements.ttsSection.insertBefore(statusDiv, this.elements.ttsSection.firstChild);
} else if (available && this.elements.ttsUnavailableMessage) {
// Remove the message if TTS becomes available
this.elements.ttsUnavailableMessage.remove();
this.elements.ttsUnavailableMessage = null;
}
} }
// Update the TTS system dropdown // Update the TTS system dropdown
+862 -486
View File
File diff suppressed because it is too large Load Diff
+202
View File
@@ -0,0 +1,202 @@
/**
* TTSHandlerModule Base Class
* Base class for all TTS handler modules
*/
import { BaseModule } from './base-module.js';
export class TTSHandlerModule extends BaseModule {
constructor(id, name) {
super(id, name);
// Common TTS handler properties
this.isReady = false;
this.isSpeaking = false;
this.currentUtterance = null;
this.voices = [];
this.currentVoice = null;
this.defaultVoice = null;
this.speechRate = 1.0;
this.pitch = 1.0;
this.volume = 1.0;
// Common dependencies for TTS handlers
this.dependencies = ['persistence-manager', 'localization'];
// Bind common methods
this.bindMethods([
'speak',
'stop',
'getVoices',
'setVoice',
'configure',
'generateSpeech'
]);
}
/**
* Get the handler ID
* @returns {string} - The handler ID
*/
getId() {
return this.id;
}
/**
* Initialize the TTS handler
* @returns {Promise<boolean>} - Resolves with success status
*/
async initialize() {
try {
this.reportProgress(20, `Initializing ${this.name}`);
// Check for required dependencies
const persistenceManager = this.getModule('persistence-manager');
if (!persistenceManager) {
console.error(`${this.name}: Persistence Manager dependency not found`);
return false;
}
// Load preferences
this.loadPreferences(persistenceManager);
// Set up event listeners
this.setupEventListeners();
return true;
} catch (error) {
console.error(`${this.name}: Initialization error`, error);
return false;
}
}
/**
* Load preferences from persistence manager
* @param {Object} persistenceManager - The persistence manager module
*/
loadPreferences(persistenceManager) {
// Load common preferences
this.speechRate = persistenceManager.getPreference('tts', 'rate', 1.0);
this.pitch = persistenceManager.getPreference('tts', 'pitch', 1.0);
this.volume = persistenceManager.getPreference('tts', 'volume', 1.0);
}
/**
* Set up common event listeners
*/
setupEventListeners() {
// To be implemented by subclasses if needed
}
/**
* Check if the handler is ready
* @returns {boolean} - Whether the handler is ready
*/
isHandlerReady() {
return this.isReady;
}
/**
* Check if the handler is currently speaking
* @returns {boolean} - Whether the handler is speaking
*/
isSpeakingNow() {
return this.isSpeaking;
}
/**
* Get available voices
* @returns {Promise<Array>} - Resolves with array of voice objects
*/
async getVoices() {
return this.voices;
}
/**
* Set the voice to use
* @param {string} voiceId - Voice identifier
* @returns {boolean} - Success status
*/
setVoice(voiceId) {
// To be implemented by subclasses
return false;
}
/**
* Configure TTS parameters
* @param {Object} options - Configuration options
* @returns {boolean} - Success status
*/
configure(options) {
let changed = false;
if (options.voice && options.voice !== this.currentVoice) {
this.setVoice(options.voice);
changed = true;
}
if (options.speed && options.speed !== this.speechRate) {
this.speechRate = options.speed;
changed = true;
}
if (options.pitch && options.pitch !== this.pitch) {
this.pitch = options.pitch;
changed = true;
}
if (options.volume && options.volume !== this.volume) {
this.volume = options.volume;
changed = true;
}
// Save preferences if changed
if (changed) {
const persistenceManager = this.getModule('persistence-manager');
if (persistenceManager) {
persistenceManager.updatePreference('tts', 'rate', this.speechRate);
persistenceManager.updatePreference('tts', 'pitch', this.pitch);
persistenceManager.updatePreference('tts', 'volume', this.volume);
if (this.currentVoice) {
persistenceManager.updatePreference('tts', `${this.id}_voice`, this.currentVoice);
}
}
}
return changed;
}
/**
* Speak text
* @param {string} text - Text to speak
* @param {Function} callback - Callback for when speech completes
* @returns {boolean} - Success status
*/
speak(text, callback) {
// To be implemented by subclasses
console.error(`${this.name}: speak() method not implemented`);
if (callback) {
setTimeout(() => callback({ success: false, reason: 'not_implemented' }), 0);
}
return false;
}
/**
* Stop speaking
* @returns {boolean} - Success status
*/
stop() {
// To be implemented by subclasses
return false;
}
/**
* Generate speech audio data
* @param {string} text - Text to generate speech for
* @param {Object} options - Generation options
* @returns {Promise<Object>} - Resolves with audio data
*/
async generateSpeech(text, options = {}) {
// To be implemented by subclasses
return { success: false, reason: 'not_implemented' };
}
}
+36 -101
View File
@@ -59,83 +59,28 @@
} }
} }
// Create a global object to store Kokoro instance // Create a simple loader object to handle the Kokoro instance
window.KokoroLoader = { window.KokoroLoader = {
loaded: false,
error: null,
instance: null, instance: null,
kokoroTTS: null, kokoroTTS: null,
voices: null, initialized: false,
callbacks: [],
progress: 0,
progressMessage: 'Initializing...',
// Register a callback for when Kokoro is loaded
onLoad: function(callback) {
if (this.loaded) {
callback(this.instance);
} else if (this.error) {
callback(null, this.error);
} else {
this.callbacks.push(callback);
}
},
// Update progress // Update progress
updateProgress: function(progress, message) { updateProgress: function(progress, message) {
this.progress = progress;
this.progressMessage = message || 'Loading...';
const progressPercent = Math.round(progress * 100); const progressPercent = Math.round(progress * 100);
document.getElementById('status').textContent = `${this.progressMessage} (${isNaN(progressPercent) ? 0 : progressPercent}%)`; document.getElementById('status').textContent = `${message} (${progressPercent}%)`;
log(`Progress: ${this.progressMessage} (${isNaN(progressPercent) ? 0 : progressPercent}%)`); log(`Progress: ${message} (${progressPercent}%)`);
// Notify parent window // Only notify parent if progress is valid
if (window.parent !== window) { if (progress !== undefined && !isNaN(progress) && window.parent !== window) {
window.parent.postMessage({ window.parent.postMessage({
type: 'kokoro-progress', type: 'kokoro-progress',
progress: isNaN(progress) ? 0 : progress, progress: progress,
message: this.progressMessage message: message
}, '*'); }, '*');
} }
}, },
// Get default voices
getDefaultVoices: function() {
return [
// American Female voices
{ id: 'af_heart', name: 'Heart', lang: 'en-US', gender: 'female' },
{ id: 'af_daisy', name: 'Daisy', lang: 'en-US', gender: 'female' },
{ id: 'af_soft', name: 'Soft', lang: 'en-US', gender: 'female' },
{ id: 'af_glados', name: 'GLaDOS', lang: 'en-US', gender: 'female' },
{ id: 'af_southern_belle', name: 'Southern Belle', lang: 'en-US', gender: 'female' },
{ id: 'af_dramatic', name: 'Dramatic', lang: 'en-US', gender: 'female' },
{ id: 'af_valley_girl', name: 'Valley Girl', lang: 'en-US', gender: 'female' },
{ id: 'af_british', name: 'British', lang: 'en-US', gender: 'female' },
{ id: 'af_russian', name: 'Russian', lang: 'en-US', gender: 'female' },
{ id: 'af_german', name: 'German', lang: 'en-US', gender: 'female' },
{ id: 'af_cheeky_cute', name: 'Cheeky Cute', lang: 'en-US', gender: 'female' },
// American Male voices
{ id: 'am_bruce', name: 'Bruce', lang: 'en-US', gender: 'male' },
{ id: 'am_announcer', name: 'Announcer', lang: 'en-US', gender: 'male' },
{ id: 'am_radio_host', name: 'Radio Host', lang: 'en-US', gender: 'male' },
// British Female voices
{ id: 'bf_charlotte', name: 'Charlotte', lang: 'en-GB', gender: 'female' },
{ id: 'bf_elizabeth', name: 'Elizabeth', lang: 'en-GB', gender: 'female' },
{ id: 'bf_lily', name: 'Lily', lang: 'en-GB', gender: 'female' },
{ id: 'bf_olivia', name: 'Olivia', lang: 'en-GB', gender: 'female' },
{ id: 'bf_victoria', name: 'Victoria', lang: 'en-GB', gender: 'female' },
// British Male voices
{ id: 'bm_william', name: 'William', lang: 'en-GB', gender: 'male' },
{ id: 'bm_arthur', name: 'Arthur', lang: 'en-GB', gender: 'male' },
{ id: 'bm_george', name: 'George', lang: 'en-GB', gender: 'male' },
{ id: 'bm_harry', name: 'Harry', lang: 'en-GB', gender: 'male' },
{ id: 'bm_jack', name: 'Jack', lang: 'en-GB', gender: 'male' }
];
},
// Initialize Kokoro // Initialize Kokoro
init: async function() { init: async function() {
try { try {
@@ -144,77 +89,56 @@
// Store the KokoroTTS class // Store the KokoroTTS class
this.kokoroTTS = KokoroTTS; this.kokoroTTS = KokoroTTS;
log('Kokoro library loaded successfully', 'success'); log('Kokoro library loaded', 'success');
this.updateProgress(0.3, 'Initializing Kokoro model...'); this.updateProgress(0.3, 'Initializing Kokoro model...');
// Initialize the model // Initialize the model
const model_id = "onnx-community/Kokoro-82M-v1.0-ONNX"; const model_id = "onnx-community/Kokoro-82M-v1.0-ONNX";
this.instance = await this.kokoroTTS.from_pretrained(model_id, { this.instance = await this.kokoroTTS.from_pretrained(model_id, {
dtype: "q8", // Use quantized model for better performance dtype: "q8", // Use quantized model for better performance
device: "wasm", // Use WebAssembly for compatibility device: "webgpu", // Use WebGL for better performance
progress_callback: (progress) => { progress_callback: (progress) => {
// Map progress from 0-1 to 30-90 // Skip progress updates if progress is NaN/undefined (cache loading)
if (progress === undefined || isNaN(progress)) {
log('Model loaded from cache', 'info');
return;
}
// Map progress from 0-1 to 30-90%
const mappedProgress = 0.3 + (progress * 0.6); const mappedProgress = 0.3 + (progress * 0.6);
this.updateProgress(mappedProgress, `Loading Kokoro model: ${Math.round(progress * 100)}%`); this.updateProgress(mappedProgress, `Loading Kokoro model: ${Math.round(progress * 100)}%`);
} }
}); });
// Fetch available voices log('Model initialized successfully', 'success');
log('Fetching available voices...');
this.updateProgress(0.8, 'Fetching voices...');
// Use default voices directly since the list_voices method is unreliable
log('Using predefined voice list instead of attempting to fetch from model');
this.voices = this.getDefaultVoices();
log(`Using ${this.voices.length} predefined voices`, 'success');
log('Testing Kokoro with a simple text');
this.updateProgress(0.95, 'Testing Kokoro...');
// Test with a simple text
// Use the first available voice for testing
const testVoice = this.voices && this.voices.length > 0 ? this.voices[0].id : 'af_heart';
await this.instance.generate('Test', { voice: testVoice });
log('Kokoro initialized successfully', 'success');
this.loaded = true;
this.updateProgress(1.0, 'Kokoro ready'); this.updateProgress(1.0, 'Kokoro ready');
this.initialized = true;
// Notify parent window // Notify parent window of successful initialization
if (window.parent !== window) { if (window.parent !== window) {
log('Notifying parent window of successful initialization'); log('Notifying parent window of successful initialization');
window.parent.postMessage({ window.parent.postMessage({
type: 'kokoro-ready', type: 'kokoro:ready',
success: true, success: true
voices: this.voices
}, '*'); }, '*');
} }
// Call all callbacks
log(`Calling ${this.callbacks.length} registered callbacks`);
this.callbacks.forEach(callback => callback(this.instance));
document.getElementById('status').textContent = 'Kokoro loaded and ready!'; document.getElementById('status').textContent = 'Kokoro loaded and ready!';
} catch (error) { } catch (error) {
const errorMsg = error.message || 'Unknown error'; const errorMsg = error.message || 'Unknown error';
log(`Error initializing Kokoro: ${errorMsg}`, 'error'); log(`Error initializing Kokoro: ${errorMsg}`, 'error');
console.error('Error initializing Kokoro:', error); console.error('Error initializing Kokoro:', error);
this.error = error;
// Notify parent window // Notify parent window
if (window.parent !== window) { if (window.parent !== window) {
log('Notifying parent window of initialization failure'); log('Notifying parent window of initialization failure');
window.parent.postMessage({ window.parent.postMessage({
type: 'kokoro-ready', type: 'kokoro:ready',
success: false, success: false,
error: errorMsg error: errorMsg
}, '*'); }, '*');
} }
// Call all callbacks with error
log(`Calling ${this.callbacks.length} registered callbacks with error`);
this.callbacks.forEach(callback => callback(null, error));
document.getElementById('status').textContent = `Error loading Kokoro: ${errorMsg}`; document.getElementById('status').textContent = `Error loading Kokoro: ${errorMsg}`;
} }
} }
@@ -232,9 +156,20 @@
const data = event.data; const data = event.data;
if (data.type === 'kokoro-generate') { if (data.type === 'kokoro:initialize') {
// If we're already initialized, just send the ready message
if (window.KokoroLoader.initialized) {
log('Already initialized, sending ready message');
window.parent.postMessage({
type: 'kokoro:ready',
success: true
}, '*');
}
// Otherwise init() will handle sending the ready message when done
}
else if (data.type === 'kokoro-generate') {
// Generate speech in a non-blocking way // Generate speech in a non-blocking way
if (!window.KokoroLoader.loaded) { if (!window.KokoroLoader.initialized || !window.KokoroLoader.instance) {
log(`Cannot process generation request ${data.id}: Kokoro not loaded`, 'error'); log(`Cannot process generation request ${data.id}: Kokoro not loaded`, 'error');
window.parent.postMessage({ window.parent.postMessage({
type: 'kokoro-generated', type: 'kokoro-generated',