Fix Kokoro TTS integration issues: Remove API key requirement and ensure system-specific options display correctly
This commit is contained in:
@@ -0,0 +1,393 @@
|
||||
/**
|
||||
* API TTS Module Base Class
|
||||
* Base class for API-based TTS modules
|
||||
*/
|
||||
import { TTSHandlerModule } from './tts-handler-module.js';
|
||||
|
||||
export class ApiTTSModuleBase extends TTSHandlerModule {
|
||||
constructor(id, name) {
|
||||
super(id, name);
|
||||
|
||||
// Basic voice options
|
||||
this.voiceOptions = {
|
||||
speed: 1.0,
|
||||
voice: null
|
||||
};
|
||||
|
||||
// API settings
|
||||
this.apiKey = '';
|
||||
this.apiBaseUrl = '';
|
||||
|
||||
// State
|
||||
this.currentAudio = null;
|
||||
|
||||
// Bind additional methods
|
||||
this.bindMethods([
|
||||
'handleApiKeyChanged',
|
||||
'handleApiUrlChanged',
|
||||
'speakPreloaded',
|
||||
'loadVoices',
|
||||
'selectVoiceForLocale',
|
||||
'selectDefaultVoice',
|
||||
'generateSpeechAudio',
|
||||
'preprocessText'
|
||||
]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize the API TTS module
|
||||
* @returns {Promise<boolean>} - Resolves with success status
|
||||
*/
|
||||
async initialize() {
|
||||
this.reportProgress(10, `Initializing ${this.name}`);
|
||||
|
||||
// Initialize parent
|
||||
const parentInit = await super.initialize();
|
||||
if (!parentInit) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Get persistence manager
|
||||
const persistenceManager = this.getModule('persistence-manager');
|
||||
if (!persistenceManager) {
|
||||
console.error(`${this.name}: Required dependency 'persistence-manager' not found`);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Load API key from preferences
|
||||
this.apiKey = persistenceManager.getPreference('tts', `${this.id}_api_key`) || '';
|
||||
|
||||
// Get default API URL
|
||||
const defaultApiUrl = this.getDefaultApiBaseUrl();
|
||||
|
||||
// Set up API base URL from preferences or use default
|
||||
const savedApiUrl = persistenceManager.getPreference('tts', `${this.id}_api_url`);
|
||||
this.apiBaseUrl = savedApiUrl || defaultApiUrl;
|
||||
|
||||
// If no API URL was saved in preferences, save the default
|
||||
if (!savedApiUrl && defaultApiUrl) {
|
||||
persistenceManager.updatePreference('tts', `${this.id}_api_url`, defaultApiUrl);
|
||||
}
|
||||
|
||||
this.reportProgress(30, `${this.name} API configuration loaded`);
|
||||
|
||||
// Set up event listeners for API key and URL changes
|
||||
document.addEventListener('tts:api:keyChanged', this.handleApiKeyChanged);
|
||||
document.addEventListener('tts:api:urlChanged', this.handleApiUrlChanged);
|
||||
|
||||
// Load voices
|
||||
await this.loadVoices();
|
||||
this.reportProgress(50, `${this.name} voices loaded`);
|
||||
|
||||
// Set up voice from preferences
|
||||
await this.setupVoiceFromPreferences();
|
||||
this.reportProgress(70, `${this.name} voice preferences configured`);
|
||||
|
||||
// Check if we have an API key
|
||||
this.isReady = !!this.apiKey;
|
||||
|
||||
// Always mark as available for UI configuration purposes
|
||||
// (even if not ready due to missing API key)
|
||||
this.reportProgress(100, `${this.name} initialization complete`);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the default API base URL for this provider
|
||||
* @returns {string} - Default API base URL
|
||||
*/
|
||||
getDefaultApiBaseUrl() {
|
||||
// To be implemented by subclasses
|
||||
return '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Set up voice based on preferences and locale
|
||||
* @returns {Promise<boolean>} - Resolves with success status
|
||||
*/
|
||||
async setupVoiceFromPreferences() {
|
||||
const persistenceManager = this.getModule('persistence-manager');
|
||||
const localization = this.getModule('localization');
|
||||
|
||||
if (!persistenceManager || !localization) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Get preferred voice ID from preferences
|
||||
const preferredVoiceId = persistenceManager.getPreference('tts', `${this.id}_voice`, '');
|
||||
|
||||
// Get current locale
|
||||
const currentLocale = localization.getLocale();
|
||||
|
||||
// If we have a preferred voice and available voices, use it
|
||||
if (preferredVoiceId && this.voices && this.voices.length > 0) {
|
||||
const voice = this.voices.find(v => v.id === preferredVoiceId);
|
||||
if (voice) {
|
||||
this.voiceOptions.voice = voice;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// Otherwise select a voice based on locale
|
||||
if (currentLocale) {
|
||||
return this.selectVoiceForLocale(currentLocale);
|
||||
}
|
||||
|
||||
// Fall back to default voice
|
||||
return this.selectDefaultVoice();
|
||||
}
|
||||
|
||||
/**
|
||||
* Load available voices from API
|
||||
* @returns {Promise<boolean>} - Resolves with success status
|
||||
*/
|
||||
async loadVoices() {
|
||||
// To be implemented by subclasses
|
||||
this.voices = [];
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Select a voice for the given locale
|
||||
* @param {string} locale - Locale code
|
||||
* @returns {boolean} - Success status
|
||||
*/
|
||||
selectVoiceForLocale(locale) {
|
||||
// To be implemented by subclasses
|
||||
return this.selectDefaultVoice();
|
||||
}
|
||||
|
||||
/**
|
||||
* Select a default voice
|
||||
* @returns {boolean} - Success status
|
||||
*/
|
||||
selectDefaultVoice() {
|
||||
if (this.voices && this.voices.length > 0) {
|
||||
this.voiceOptions.voice = this.voices[0];
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate speech audio blob for the given text using the API.
|
||||
* @param {string} text - The text to synthesize.
|
||||
* @returns {Promise<Object>} - A promise that resolves with the audio data object.
|
||||
*/
|
||||
async generateSpeechAudio(text) {
|
||||
// To be implemented by subclasses
|
||||
return { success: false, reason: 'not_implemented' };
|
||||
}
|
||||
|
||||
/**
|
||||
* Speak preloaded audio data
|
||||
* @param {Object} preloadData - Preloaded audio data
|
||||
* @param {Function} callback - Callback for when speech completes
|
||||
* @returns {boolean} - Success status
|
||||
*/
|
||||
speakPreloaded(preloadData, callback = null) {
|
||||
if (!preloadData || !preloadData.audioData) {
|
||||
if (callback) {
|
||||
callback({ success: false, reason: 'invalid_data' });
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Stop any ongoing speech
|
||||
this.stop();
|
||||
|
||||
// Create audio blob
|
||||
const audioBlob = new Blob([preloadData.audioData], { type: 'audio/mp3' });
|
||||
const audioUrl = URL.createObjectURL(audioBlob);
|
||||
|
||||
// Create audio element
|
||||
const audio = new Audio(audioUrl);
|
||||
|
||||
// Set up event handlers
|
||||
audio.onended = () => {
|
||||
this.isSpeaking = false;
|
||||
if (callback) {
|
||||
callback({ success: true });
|
||||
}
|
||||
URL.revokeObjectURL(audioUrl);
|
||||
};
|
||||
|
||||
audio.onerror = (error) => {
|
||||
this.isSpeaking = false;
|
||||
if (callback) {
|
||||
callback({ success: false, reason: 'playback_error', error });
|
||||
}
|
||||
URL.revokeObjectURL(audioUrl);
|
||||
};
|
||||
|
||||
// Start playback
|
||||
this.currentAudio = audio;
|
||||
this.isSpeaking = true;
|
||||
|
||||
// Handle play error
|
||||
audio.play().catch(error => {
|
||||
this.isSpeaking = false;
|
||||
if (callback) {
|
||||
callback({ success: false, reason: 'playback_error', error });
|
||||
}
|
||||
URL.revokeObjectURL(audioUrl);
|
||||
});
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop speaking
|
||||
* @returns {boolean} - Success status
|
||||
*/
|
||||
stop() {
|
||||
if (this.currentAudio) {
|
||||
try {
|
||||
this.currentAudio.pause();
|
||||
this.currentAudio.currentTime = 0;
|
||||
this.currentAudio = null;
|
||||
this.isSpeaking = false;
|
||||
return true;
|
||||
} catch (error) {
|
||||
console.error(`${this.name}: Error stopping speech:`, error);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Speak text
|
||||
* @param {string} text - Text to speak
|
||||
* @param {Function} callback - Callback for when speech completes
|
||||
* @returns {boolean} - Success status
|
||||
*/
|
||||
speak(text, callback = null) {
|
||||
if (!this.isReady) {
|
||||
if (callback) {
|
||||
callback({ success: false, reason: 'not_ready' });
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Generate and play speech
|
||||
this.generateSpeechAudio(text).then(result => {
|
||||
if (result.success && result.audioData) {
|
||||
// Create audio from blob and play it
|
||||
this.speakPreloaded({ audioData: result.audioData }, callback);
|
||||
} else if (callback) {
|
||||
callback({ success: false, reason: 'generation_failed' });
|
||||
}
|
||||
}).catch(error => {
|
||||
if (callback) {
|
||||
callback({ success: false, reason: 'generation_error', error });
|
||||
}
|
||||
});
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Preload speech for later playback
|
||||
* @param {string} text - Text to preload
|
||||
* @returns {Promise<Object>} - Preloaded speech data
|
||||
*/
|
||||
async preloadSpeech(text) {
|
||||
if (!this.isReady) {
|
||||
return { success: false, reason: 'not_ready' };
|
||||
}
|
||||
|
||||
try {
|
||||
// Generate speech
|
||||
const result = await this.generateSpeechAudio(text);
|
||||
|
||||
if (!result.success) {
|
||||
return { success: false, reason: 'generation_failed' };
|
||||
}
|
||||
|
||||
return {
|
||||
success: true,
|
||||
audioData: result.audioData,
|
||||
text,
|
||||
duration: result.duration || 0
|
||||
};
|
||||
} catch (error) {
|
||||
return { success: false, reason: 'generation_error', error };
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Preprocess text for TTS
|
||||
* @param {string} text - Text to preprocess
|
||||
* @returns {string} - Processed text
|
||||
*/
|
||||
preprocessText(text) {
|
||||
if (!text) {
|
||||
return '';
|
||||
}
|
||||
|
||||
// Remove HTML tags
|
||||
let processed = text.replace(/<[^>]*>/g, ' ');
|
||||
|
||||
// Replace special characters
|
||||
processed = processed.replace(/&/g, ' and ');
|
||||
|
||||
// Normalize whitespace
|
||||
processed = processed.replace(/\s+/g, ' ').trim();
|
||||
|
||||
// Add trailing period if missing
|
||||
if (!/[.!?]$/.test(processed)) {
|
||||
processed += '.';
|
||||
}
|
||||
|
||||
return processed;
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle API key change event
|
||||
* @param {Event} event - Event object
|
||||
*/
|
||||
handleApiKeyChanged(event) {
|
||||
if (event && event.detail && event.detail.provider === this.id) {
|
||||
const newKey = event.detail.key || '';
|
||||
|
||||
// Security check - never use a URL as an API key
|
||||
if (newKey && newKey.startsWith('http')) {
|
||||
console.error(`${this.name}: Received URL instead of API key, ignoring it`);
|
||||
return;
|
||||
}
|
||||
|
||||
// Update API key
|
||||
this.apiKey = newKey;
|
||||
|
||||
// Save to preferences
|
||||
const persistenceManager = this.getModule('persistence-manager');
|
||||
if (persistenceManager) {
|
||||
persistenceManager.updatePreference('tts', `${this.id}_api_key`, newKey);
|
||||
}
|
||||
|
||||
// Update ready state
|
||||
this.isReady = !!this.apiKey;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle API URL change event
|
||||
* @param {Event} event - Event object
|
||||
*/
|
||||
handleApiUrlChanged(event) {
|
||||
if (event && event.detail && event.detail.provider === this.id) {
|
||||
const newUrl = event.detail.url || this.getDefaultApiBaseUrl();
|
||||
|
||||
// Update API URL
|
||||
this.apiBaseUrl = newUrl;
|
||||
|
||||
// Save to preferences
|
||||
const persistenceManager = this.getModule('persistence-manager');
|
||||
if (persistenceManager) {
|
||||
persistenceManager.updatePreference('tts', `${this.id}_api_url`, newUrl);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,570 @@
|
||||
/**
|
||||
* BrowserTTSModule for AI Interactive Fiction
|
||||
* Implementation using the browser's Web Speech API
|
||||
*/
|
||||
import { TTSHandlerModule } from './tts-handler-module.js';
|
||||
|
||||
/**
|
||||
* Browser TTS Module - Uses the browser's Web Speech API for TTS
|
||||
*/
|
||||
export class BrowserTTSModule extends TTSHandlerModule {
|
||||
constructor() {
|
||||
super('browser', 'Browser TTS');
|
||||
|
||||
// Voice options
|
||||
this.voiceOptions = {
|
||||
voice: null, // Will be set during initialization
|
||||
rate: 1.0,
|
||||
pitch: 1.0,
|
||||
volume: 1.0
|
||||
};
|
||||
|
||||
// State
|
||||
this.available = false;
|
||||
this.currentUtterance = null;
|
||||
|
||||
// Ensure dependencies are correctly defined from parent class
|
||||
// this.dependencies should already contain ['persistence-manager', 'localization']
|
||||
|
||||
// Bind additional methods beyond those in TTSHandlerModule
|
||||
this.bindMethods([
|
||||
'onVoicesChanged',
|
||||
'loadVoices',
|
||||
'selectVoiceForLocale',
|
||||
'synthesizeToWav',
|
||||
'speakPreloaded',
|
||||
'speak',
|
||||
'preprocessText',
|
||||
'inferVoiceGender'
|
||||
]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize the browser TTS module
|
||||
* @returns {Promise<boolean>} - Resolves with success status
|
||||
*/
|
||||
async initialize() {
|
||||
try {
|
||||
this.reportProgress(10, 'Initializing Browser TTS');
|
||||
|
||||
// Check for browser support
|
||||
if (!window.speechSynthesis) {
|
||||
console.error('Browser TTS: Speech synthesis not available in this browser');
|
||||
return false;
|
||||
}
|
||||
|
||||
this.reportProgress(30, 'Browser TTS supported');
|
||||
|
||||
// Initialize parent
|
||||
const parentInit = await super.initialize();
|
||||
if (!parentInit) {
|
||||
console.error('Browser TTS: Parent initialization failed');
|
||||
return false;
|
||||
}
|
||||
|
||||
// Get required dependencies
|
||||
const persistenceManager = this.getModule('persistence-manager');
|
||||
if (!persistenceManager) {
|
||||
console.error('Browser TTS: Required dependency persistence-manager not found');
|
||||
return false;
|
||||
}
|
||||
|
||||
const localization = this.getModule('localization');
|
||||
if (!localization) {
|
||||
console.error('Browser TTS: Required dependency localization not found');
|
||||
return false;
|
||||
}
|
||||
|
||||
// Load voices
|
||||
const voicesLoaded = await this.loadVoices();
|
||||
if (!voicesLoaded) {
|
||||
console.error('Browser TTS: Failed to load voices');
|
||||
return false;
|
||||
}
|
||||
|
||||
// Set speech options from preferences
|
||||
this.voiceOptions.rate = persistenceManager.getPreference('tts', 'rate', 1.0);
|
||||
this.voiceOptions.pitch = persistenceManager.getPreference('tts', 'pitch', 1.0);
|
||||
this.voiceOptions.volume = persistenceManager.getPreference('tts', 'volume', 1.0);
|
||||
const preferredVoice = persistenceManager.getPreference('tts', 'browser_voice', '');
|
||||
|
||||
// Set voice based on current locale
|
||||
const currentLocale = localization.getLocale() || 'en-us';
|
||||
await this.selectVoiceForLocale(currentLocale, preferredVoice);
|
||||
|
||||
// Listen for locale changes
|
||||
document.addEventListener('locale:changed', async (event) => {
|
||||
if (event.detail && event.detail.locale) {
|
||||
await this.selectVoiceForLocale(event.detail.locale);
|
||||
}
|
||||
});
|
||||
|
||||
// Listen for voices changed events
|
||||
if (window.speechSynthesis.onvoiceschanged !== undefined) {
|
||||
window.speechSynthesis.onvoiceschanged = this.onVoicesChanged;
|
||||
}
|
||||
|
||||
this.isReady = true;
|
||||
this.available = true;
|
||||
this.reportProgress(100, 'Browser TTS initialized');
|
||||
|
||||
return true;
|
||||
} catch (error) {
|
||||
console.error('Browser TTS: Initialization error:', error);
|
||||
this.isReady = false;
|
||||
this.available = false;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle voices changed event
|
||||
*/
|
||||
async onVoicesChanged() {
|
||||
await this.loadVoices();
|
||||
|
||||
// Re-select voice based on current locale
|
||||
const localization = this.getModule('localization');
|
||||
const persistenceManager = this.getModule('persistence-manager');
|
||||
|
||||
if (localization && persistenceManager) {
|
||||
const currentLocale = localization.getLocale() || 'en-us';
|
||||
const preferredVoice = persistenceManager.getPreference('tts', 'browser_voice', '');
|
||||
await this.selectVoiceForLocale(currentLocale, preferredVoice);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Load available voices from the speech synthesis API
|
||||
* @returns {Promise<boolean>} - Resolves with success status
|
||||
*/
|
||||
async loadVoices() {
|
||||
try {
|
||||
this.reportProgress(40, 'Loading browser voices');
|
||||
|
||||
// Try to get voices
|
||||
let voices = window.speechSynthesis.getVoices();
|
||||
|
||||
// If voices array is empty, wait for onvoiceschanged event
|
||||
if (!voices || voices.length === 0) {
|
||||
try {
|
||||
console.log('Browser TTS: No voices available immediately, waiting for voices to load...');
|
||||
|
||||
// Wait for voices to be loaded (with timeout)
|
||||
voices = await new Promise((resolve, reject) => {
|
||||
// Set a timeout in case voices never load
|
||||
const timeout = setTimeout(() => {
|
||||
console.warn('Browser TTS: Timeout waiting for voices');
|
||||
// Resolve with empty array instead of rejecting
|
||||
resolve([]);
|
||||
}, 3000);
|
||||
|
||||
// Listen for voices changed event
|
||||
window.speechSynthesis.onvoiceschanged = () => {
|
||||
clearTimeout(timeout);
|
||||
const loadedVoices = window.speechSynthesis.getVoices();
|
||||
console.log(`Browser TTS: Voices loaded, found ${loadedVoices.length} voices`);
|
||||
resolve(loadedVoices);
|
||||
};
|
||||
});
|
||||
} catch (voiceWaitError) {
|
||||
console.error('Browser TTS: Error waiting for voices:', voiceWaitError);
|
||||
// Continue with empty voices array
|
||||
voices = [];
|
||||
}
|
||||
}
|
||||
|
||||
// Store voices
|
||||
this.voices = voices || [];
|
||||
|
||||
// Log available voices for debugging
|
||||
console.log(`Browser TTS: Loaded ${this.voices.length} voices`);
|
||||
if (this.voices.length > 0) {
|
||||
console.log('Browser TTS: First few voices:', this.voices.slice(0, 3));
|
||||
}
|
||||
|
||||
// If no voices available but speech synthesis is supported, still return true
|
||||
// Some browsers may not expose voices but still support speech synthesis
|
||||
if (this.voices.length === 0) {
|
||||
console.warn('Browser TTS: No voices available, but continuing with default voice');
|
||||
// Create a default voice entry
|
||||
this.voices = [{
|
||||
default: true,
|
||||
lang: 'en-US',
|
||||
localService: true,
|
||||
name: 'Default Voice',
|
||||
voiceURI: 'default'
|
||||
}];
|
||||
}
|
||||
|
||||
this.reportProgress(60, 'Browser voices loaded');
|
||||
return true;
|
||||
} catch (error) {
|
||||
console.error('Browser TTS: Error loading voices:', error);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set voice based on locale
|
||||
* @param {string} locale - Locale code (e.g., 'en-us', 'de', 'fr')
|
||||
* @param {string} preferredVoice - Optional preferred voice name
|
||||
* @returns {Promise<boolean>} - Success status
|
||||
*/
|
||||
async selectVoiceForLocale(locale = 'en-us', preferredVoice = '') {
|
||||
// Normalize locale format
|
||||
locale = locale.toLowerCase().replace('_', '-');
|
||||
const languageCode = locale.split('-')[0];
|
||||
|
||||
// First try to use the preferred voice if specified
|
||||
if (preferredVoice) {
|
||||
const voice = this.voices.find(v =>
|
||||
v.name === preferredVoice ||
|
||||
v.voiceURI === preferredVoice
|
||||
);
|
||||
|
||||
if (voice) {
|
||||
this.voiceOptions.voice = voice;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// Try to find a voice that matches the exact locale
|
||||
const exactMatch = this.voices.find(v =>
|
||||
v.lang.toLowerCase() === locale
|
||||
);
|
||||
|
||||
if (exactMatch) {
|
||||
this.voiceOptions.voice = exactMatch;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Try to find a voice that matches the language code
|
||||
const languageMatch = this.voices.find(v =>
|
||||
v.lang.toLowerCase().startsWith(languageCode)
|
||||
);
|
||||
|
||||
if (languageMatch) {
|
||||
this.voiceOptions.voice = languageMatch;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Fallback to the first available voice
|
||||
if (this.voices.length > 0) {
|
||||
this.voiceOptions.voice = this.voices[0];
|
||||
return true;
|
||||
}
|
||||
|
||||
// No voices available
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Speak text
|
||||
* @param {string} text - Text to speak
|
||||
* @param {Function} callback - Callback for when speech completes
|
||||
* @returns {boolean} - Success status
|
||||
*/
|
||||
speak(text, callback = null) {
|
||||
if (!this.isReady || !window.speechSynthesis) {
|
||||
if (callback) {
|
||||
callback({ success: false, reason: 'not_ready' });
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Stop any ongoing speech
|
||||
this.stop();
|
||||
|
||||
const processedText = this.preprocessText(text);
|
||||
|
||||
// Create utterance
|
||||
const utterance = new SpeechSynthesisUtterance(processedText);
|
||||
|
||||
// Set options
|
||||
if (this.voiceOptions.voice) {
|
||||
utterance.voice = this.voiceOptions.voice;
|
||||
}
|
||||
|
||||
utterance.rate = this.voiceOptions.rate;
|
||||
utterance.pitch = this.voiceOptions.pitch;
|
||||
utterance.volume = this.voiceOptions.volume;
|
||||
|
||||
// Set up event handlers
|
||||
utterance.onend = () => {
|
||||
this.isSpeaking = false;
|
||||
if (callback) {
|
||||
callback({ success: true });
|
||||
}
|
||||
};
|
||||
|
||||
utterance.onerror = (error) => {
|
||||
this.isSpeaking = false;
|
||||
console.error('Browser TTS: Speech error', error);
|
||||
if (callback) {
|
||||
callback({ success: false, reason: 'synthesis_error', error });
|
||||
}
|
||||
};
|
||||
|
||||
// Store current utterance
|
||||
this.currentUtterance = utterance;
|
||||
this.isSpeaking = true;
|
||||
|
||||
// Start speaking
|
||||
window.speechSynthesis.speak(utterance);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Preload speech for a text
|
||||
* @param {string} text - Text to preload
|
||||
* @returns {Promise<Object>} - Preloaded speech data
|
||||
*/
|
||||
async preloadSpeech(text) {
|
||||
if (!this.isReady || !window.speechSynthesis) {
|
||||
return { success: false, reason: 'not_ready' };
|
||||
}
|
||||
|
||||
// Generate WAV audio data
|
||||
const wavResult = await this.synthesizeToWav(text);
|
||||
|
||||
if (!wavResult.success) {
|
||||
return { success: false, reason: 'synthesis_failed' };
|
||||
}
|
||||
|
||||
return {
|
||||
success: true,
|
||||
audioData: wavResult.audioData,
|
||||
text,
|
||||
duration: wavResult.duration || 0
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert speech synthesis to WAV format
|
||||
* @param {string} text - Text to synthesize
|
||||
* @returns {Promise<Object>} - Object with audio data
|
||||
*/
|
||||
async synthesizeToWav(text) {
|
||||
return new Promise((resolve) => {
|
||||
if (!this.isReady || !window.speechSynthesis) {
|
||||
resolve({ success: false, reason: 'not_ready' });
|
||||
return;
|
||||
}
|
||||
|
||||
// Process text for better synthesis
|
||||
const processedText = this.preprocessText(text);
|
||||
|
||||
// Create audio context
|
||||
const AudioContext = window.AudioContext || window.webkitAudioContext;
|
||||
if (!AudioContext) {
|
||||
resolve({ success: false, reason: 'no_audio_context' });
|
||||
return;
|
||||
}
|
||||
|
||||
const audioContext = new AudioContext();
|
||||
|
||||
// Create media stream destination
|
||||
const destination = audioContext.createMediaStreamDestination();
|
||||
|
||||
// Create media recorder
|
||||
const mediaRecorder = new MediaRecorder(destination.stream);
|
||||
const audioChunks = [];
|
||||
|
||||
// Set up event handlers
|
||||
mediaRecorder.ondataavailable = (event) => {
|
||||
if (event.data.size > 0) {
|
||||
audioChunks.push(event.data);
|
||||
}
|
||||
};
|
||||
|
||||
mediaRecorder.onstop = () => {
|
||||
// Create blob from chunks
|
||||
const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
|
||||
|
||||
// Convert blob to array buffer
|
||||
const reader = new FileReader();
|
||||
reader.onloadend = () => {
|
||||
resolve({
|
||||
success: true,
|
||||
audioData: reader.result
|
||||
});
|
||||
};
|
||||
|
||||
reader.onerror = () => {
|
||||
resolve({ success: false, reason: 'blob_read_error' });
|
||||
};
|
||||
|
||||
reader.readAsArrayBuffer(audioBlob);
|
||||
};
|
||||
|
||||
// Create utterance
|
||||
const utterance = new SpeechSynthesisUtterance(processedText);
|
||||
|
||||
// Set options
|
||||
if (this.voiceOptions.voice) {
|
||||
utterance.voice = this.voiceOptions.voice;
|
||||
}
|
||||
|
||||
utterance.rate = this.voiceOptions.rate;
|
||||
utterance.pitch = this.voiceOptions.pitch;
|
||||
utterance.volume = this.voiceOptions.volume;
|
||||
|
||||
// Start recording
|
||||
mediaRecorder.start();
|
||||
|
||||
// Set up completion handling
|
||||
utterance.onend = () => {
|
||||
mediaRecorder.stop();
|
||||
};
|
||||
|
||||
utterance.onerror = (error) => {
|
||||
console.error('Browser TTS: Synthesis error', error);
|
||||
mediaRecorder.stop();
|
||||
resolve({ success: false, reason: 'synthesis_error' });
|
||||
};
|
||||
|
||||
// Start speaking
|
||||
window.speechSynthesis.speak(utterance);
|
||||
|
||||
// Set timeout in case onend never fires
|
||||
setTimeout(() => {
|
||||
if (mediaRecorder.state === 'recording') {
|
||||
mediaRecorder.stop();
|
||||
}
|
||||
}, 30000); // 30-second timeout
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Speak preloaded audio data
|
||||
* @param {Object} preloadedData - Data from preloadSpeech
|
||||
* @param {Function} callback - Callback for when speech completes
|
||||
* @returns {boolean} - Success status
|
||||
*/
|
||||
speakPreloaded(preloadedData, callback = null) {
|
||||
if (!preloadedData || !preloadedData.text) {
|
||||
console.error('Browser TTS: Invalid preloaded data');
|
||||
return false;
|
||||
}
|
||||
|
||||
// For browser TTS, we don't use the preloaded data directly
|
||||
// Instead, we just speak the text again
|
||||
return this.speak(preloadedData.text, callback);
|
||||
}
|
||||
|
||||
/**
|
||||
* Preprocess text for TTS
|
||||
* @param {string} text - Text to preprocess
|
||||
* @returns {string} - Processed text
|
||||
*/
|
||||
preprocessText(text) {
|
||||
// Remove HTML tags
|
||||
text = text.replace(/<[^>]*>/g, ' ');
|
||||
|
||||
// Replace special characters with their spoken equivalents
|
||||
text = text.replace(/&/g, ' and ');
|
||||
|
||||
// Normalize whitespace
|
||||
text = text.replace(/\s+/g, ' ').trim();
|
||||
|
||||
return text;
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop speaking
|
||||
* @returns {boolean} - Success status
|
||||
*/
|
||||
stop() {
|
||||
if (window.speechSynthesis) {
|
||||
window.speechSynthesis.cancel();
|
||||
this.isSpeaking = false;
|
||||
this.currentUtterance = null;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get available voices
|
||||
* @returns {Array} - Array of voice objects
|
||||
*/
|
||||
async getVoices() {
|
||||
if (!this.isReady) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const localization = this.getModule('localization');
|
||||
const currentLocale = localization ? localization.getLocale() : 'en-us';
|
||||
|
||||
// Normalize locale format
|
||||
const normalizedLocale = currentLocale.toLowerCase().replace('_', '-');
|
||||
const languageCode = normalizedLocale.split('-')[0];
|
||||
|
||||
// Filter voices by current locale
|
||||
const filteredVoices = this.voices.filter(voice => {
|
||||
const voiceLang = voice.lang.toLowerCase();
|
||||
return voiceLang.startsWith(languageCode) ||
|
||||
voiceLang === normalizedLocale ||
|
||||
(normalizedLocale.startsWith(voiceLang) && voiceLang.length === 2);
|
||||
});
|
||||
|
||||
// If matching voices found, use them
|
||||
if (filteredVoices.length > 0) {
|
||||
return filteredVoices.map(voice => ({
|
||||
id: voice.voiceURI,
|
||||
name: voice.name,
|
||||
lang: voice.lang,
|
||||
gender: this.inferVoiceGender(voice.name)
|
||||
}));
|
||||
}
|
||||
|
||||
// If no matching voices found, return all voices
|
||||
return this.voices.map(voice => ({
|
||||
id: voice.voiceURI,
|
||||
name: voice.name,
|
||||
lang: voice.lang,
|
||||
gender: this.inferVoiceGender(voice.name)
|
||||
}));
|
||||
}
|
||||
|
||||
/**
|
||||
* Infer voice gender from name
|
||||
* @param {string} name - Voice name
|
||||
* @returns {string} - Inferred gender ('male', 'female', or 'unknown')
|
||||
*/
|
||||
inferVoiceGender(name) {
|
||||
const lowerName = name.toLowerCase();
|
||||
|
||||
// Common terms indicating gender
|
||||
const maleTerms = ['male', 'man', 'guy', 'boy', 'mr', 'sir'];
|
||||
const femaleTerms = ['female', 'woman', 'lady', 'girl', 'ms', 'mrs', 'miss'];
|
||||
|
||||
// Check for explicit gender terms in the name
|
||||
for (const term of maleTerms) {
|
||||
if (lowerName.includes(term)) return 'male';
|
||||
}
|
||||
|
||||
for (const term of femaleTerms) {
|
||||
if (lowerName.includes(term)) return 'female';
|
||||
}
|
||||
|
||||
return 'unknown';
|
||||
}
|
||||
}
|
||||
|
||||
// Register the module with the module registry
|
||||
// Module registry MUST be accessed via window, not direct import
|
||||
if (window.moduleRegistry) {
|
||||
try {
|
||||
// Create instance first, then register it
|
||||
const browserTTSModule = new BrowserTTSModule();
|
||||
window.moduleRegistry.register(browserTTSModule);
|
||||
console.log('Browser TTS Module registered successfully');
|
||||
} catch (err) {
|
||||
console.error('Failed to register Browser TTS Module:', err);
|
||||
}
|
||||
} else {
|
||||
console.error('Module registry not available when attempting to register Browser TTS Module');
|
||||
}
|
||||
@@ -0,0 +1,270 @@
|
||||
/**
|
||||
* ElevenLabsTTSModule
|
||||
* Provides TTS via ElevenLabs API
|
||||
*/
|
||||
import { ApiTTSModuleBase } from './api-tts-module-base.js';
|
||||
|
||||
export class ElevenLabsTTSModule extends ApiTTSModuleBase {
|
||||
constructor() {
|
||||
super('elevenlabs', 'ElevenLabs TTS');
|
||||
|
||||
// Voice options specific to ElevenLabs
|
||||
this.voiceOptions = {
|
||||
voice: 'pNInz6obpgDQGcFmaJgB', // Default voice ID for ElevenLabs
|
||||
model: 'eleven_multilingual_v2', // Use the multilingual model
|
||||
speed: 1.0
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize the ElevenLabs TTS module
|
||||
* @returns {Promise<boolean>} - Resolves with success status
|
||||
*/
|
||||
async initialize() {
|
||||
try {
|
||||
this.reportProgress(10, 'Initializing ElevenLabs TTS');
|
||||
|
||||
// Initialize parent
|
||||
const parentInit = await super.initialize();
|
||||
if (!parentInit) {
|
||||
console.error('ElevenLabs TTS: Parent initialization failed');
|
||||
return false;
|
||||
}
|
||||
|
||||
// Get required dependencies
|
||||
const persistenceManager = this.getModule('persistence-manager');
|
||||
if (!persistenceManager) {
|
||||
console.error('ElevenLabs TTS: Required dependency persistence-manager not found');
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check for API key
|
||||
const apiKey = persistenceManager.getPreference('elevenlabs', 'api_key', '');
|
||||
if (!apiKey) {
|
||||
console.error('ElevenLabs TTS: API key not configured');
|
||||
return false;
|
||||
}
|
||||
|
||||
// Load voices from ElevenLabs
|
||||
try {
|
||||
this.reportProgress(50, 'Loading ElevenLabs voices');
|
||||
await this.loadVoices(apiKey);
|
||||
} catch (error) {
|
||||
console.error('ElevenLabs TTS: Failed to load voices:', error);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Load preferences
|
||||
const preferredVoice = persistenceManager.getPreference('elevenlabs', 'voice', this.voiceOptions.voice);
|
||||
if (preferredVoice) {
|
||||
this.voiceOptions.voice = preferredVoice;
|
||||
}
|
||||
|
||||
const preferredModel = persistenceManager.getPreference('elevenlabs', 'model', this.voiceOptions.model);
|
||||
if (preferredModel) {
|
||||
this.voiceOptions.model = preferredModel;
|
||||
}
|
||||
|
||||
const preferredSpeed = persistenceManager.getPreference('elevenlabs', 'speed', this.voiceOptions.speed);
|
||||
if (typeof preferredSpeed === 'number') {
|
||||
this.voiceOptions.speed = preferredSpeed;
|
||||
}
|
||||
|
||||
this.isReady = true;
|
||||
this.reportProgress(100, 'ElevenLabs TTS initialized');
|
||||
return true;
|
||||
} catch (error) {
|
||||
console.error('ElevenLabs TTS: Initialization error:', error);
|
||||
this.isReady = false;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the default API base URL for ElevenLabs
|
||||
* @returns {string} - Default API base URL
|
||||
*/
|
||||
getDefaultApiBaseUrl() {
|
||||
return 'https://api.elevenlabs.io/v1';
|
||||
}
|
||||
|
||||
/**
|
||||
* Load available voices from ElevenLabs API
|
||||
* @param {string} apiKey - API key for authentication
|
||||
* @returns {Promise<boolean>} - Resolves with success status
|
||||
*/
|
||||
async loadVoices(apiKey) {
|
||||
// Set default voices that will be used if API call fails
|
||||
this.voices = [
|
||||
{ id: 'pNInz6obpgDQGcFmaJgB', name: 'Rachel', language: 'en' },
|
||||
{ id: '21m00Tcm4TlvDq8ikWAM', name: 'Adam', language: 'en' },
|
||||
{ id: 'AZnzlk1XvdvUeBnXmlld', name: 'Antoni', language: 'en' },
|
||||
{ id: 'EXAVITQu4vr4xnSDxMaL', name: 'Bella', language: 'en' },
|
||||
{ id: 'ErXwobaYiN019PkySvjV', name: 'Daniel', language: 'en' }
|
||||
];
|
||||
|
||||
// Only load from API if we have an API key
|
||||
if (!apiKey) {
|
||||
return true;
|
||||
}
|
||||
|
||||
try {
|
||||
const response = await fetch(`${this.apiBaseUrl}/voices`, {
|
||||
method: 'GET',
|
||||
headers: {
|
||||
'xi-api-key': apiKey,
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
console.error(`ElevenLabs TTS: API error: ${response.status} ${response.statusText}`);
|
||||
return true; // Use defaults, but don't fail initialization
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
|
||||
if (data && data.voices && Array.isArray(data.voices)) {
|
||||
// Transform API response to our internal format
|
||||
this.voices = data.voices.map(voice => ({
|
||||
id: voice.voice_id,
|
||||
name: voice.name,
|
||||
language: 'en', // ElevenLabs doesn't provide language info
|
||||
preview: voice.preview_url
|
||||
}));
|
||||
|
||||
return true;
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('ElevenLabs TTS: Error loading voices:', error);
|
||||
}
|
||||
|
||||
// If API call failed, we still return true since we have default voices
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Select a voice for the given locale
|
||||
* @param {string} locale - Locale code
|
||||
* @returns {boolean} - Success status
|
||||
*/
|
||||
selectVoiceForLocale(locale) {
|
||||
if (!this.voices || this.voices.length === 0) {
|
||||
return this.selectDefaultVoice();
|
||||
}
|
||||
|
||||
// ElevenLabs doesn't provide language info for voices
|
||||
// Simply use the first voice as default
|
||||
return this.selectDefaultVoice();
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate speech audio data using ElevenLabs API
|
||||
* @param {string} text - Text to generate speech for
|
||||
* @returns {Promise<Object>} - Audio data object
|
||||
*/
|
||||
async generateSpeechAudio(text) {
|
||||
// Don't attempt to call the API if no API key is set or text is empty
|
||||
if (!text || !this.apiKey) {
|
||||
return { success: false, reason: 'missing_api_key_or_text' };
|
||||
}
|
||||
|
||||
try {
|
||||
// Process the text
|
||||
const processedText = this.preprocessText(text);
|
||||
|
||||
// Create request payload
|
||||
const payload = {
|
||||
text: processedText,
|
||||
model_id: this.voiceOptions.model || 'eleven_multilingual_v2',
|
||||
voice_settings: {
|
||||
stability: 0.5,
|
||||
similarity_boost: 0.75,
|
||||
style: 0.0,
|
||||
use_speaker_boost: true,
|
||||
speed: this.voiceOptions.speed || 1.0
|
||||
}
|
||||
};
|
||||
|
||||
// Make API request
|
||||
const response = await fetch(`${this.apiBaseUrl}/text-to-speech/${this.voiceOptions.voice}?optimize_streaming_latency=0`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'xi-api-key': this.apiKey,
|
||||
'Accept': 'audio/wav'
|
||||
},
|
||||
body: JSON.stringify(payload)
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`API error: ${response.status} ${response.statusText}`);
|
||||
}
|
||||
|
||||
// Get audio blob from response
|
||||
const audioBlob = await response.blob();
|
||||
|
||||
// Convert to array buffer for consistency with other modules
|
||||
const arrayBuffer = await audioBlob.arrayBuffer();
|
||||
|
||||
return {
|
||||
success: true,
|
||||
audioData: arrayBuffer
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('ElevenLabs TTS: Error generating speech:', error);
|
||||
return {
|
||||
success: false,
|
||||
reason: 'api_error',
|
||||
error: error.message
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set voice options
|
||||
* @param {Object} options - Voice options
|
||||
*/
|
||||
setVoiceOptions(options = {}) {
|
||||
// Call parent method for common options
|
||||
if (options.voice) {
|
||||
this.voiceOptions.voice = options.voice;
|
||||
|
||||
// Save voice preference
|
||||
const persistenceManager = this.getModule('persistence-manager');
|
||||
if (persistenceManager) {
|
||||
persistenceManager.updatePreference('tts', 'elevenlabs_voice', options.voice);
|
||||
}
|
||||
}
|
||||
|
||||
if (typeof options.speed === 'number') {
|
||||
this.voiceOptions.speed = Math.max(0.5, Math.min(2.0, options.speed));
|
||||
}
|
||||
|
||||
// Handle ElevenLabs-specific options
|
||||
if (options.model) {
|
||||
this.voiceOptions.model = options.model;
|
||||
|
||||
// Save model preference
|
||||
const persistenceManager = this.getModule('persistence-manager');
|
||||
if (persistenceManager) {
|
||||
persistenceManager.updatePreference('tts', 'elevenlabs_model', options.model);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Register the module with the module registry
|
||||
// Module registry MUST be accessed via window, not direct import
|
||||
if (window.moduleRegistry) {
|
||||
try {
|
||||
// Create instance first, then register it
|
||||
const elevenLabsTTSModule = new ElevenLabsTTSModule();
|
||||
window.moduleRegistry.register(elevenLabsTTSModule);
|
||||
console.log('ElevenLabs TTS Module registered successfully');
|
||||
} catch (err) {
|
||||
console.error('Failed to register ElevenLabs TTS Module:', err);
|
||||
}
|
||||
} else {
|
||||
console.error('Module registry not available when attempting to register ElevenLabs TTS Module');
|
||||
}
|
||||
@@ -425,7 +425,7 @@ export class KokoroHandler extends TTSHandler {
|
||||
try {
|
||||
const persistenceManager = this.getModule('persistence-manager');
|
||||
if (persistenceManager) {
|
||||
persistenceManager.setPreference('tts-voice-kokoro', foundVoice.id);
|
||||
persistenceManager.updatePreference('tts-voice-kokoro', foundVoice.id);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Kokoro TTS: Error saving voice preference:', error);
|
||||
|
||||
@@ -0,0 +1,657 @@
|
||||
/**
|
||||
* KokoroTTSModule for AI Interactive Fiction
|
||||
* Implementation using the Kokoro library
|
||||
*/
|
||||
import { TTSHandlerModule } from './tts-handler-module.js';
|
||||
|
||||
export class KokoroTTSModule extends TTSHandlerModule {
|
||||
constructor() {
|
||||
super('kokoro', 'Kokoro TTS');
|
||||
|
||||
// State
|
||||
this.iframe = null;
|
||||
this.currentAudio = null;
|
||||
this.pendingGenerations = new Map();
|
||||
this.generationCounter = 0;
|
||||
this.voices = [];
|
||||
this.lastProgressTime = null;
|
||||
this.lastProgressValue = null;
|
||||
this.modelLoaded = false;
|
||||
|
||||
// Bind additional methods beyond those in TTSHandlerModule
|
||||
this.bindMethods([
|
||||
'handleIframeMessage',
|
||||
'setupVoiceFromPreferences',
|
||||
'generateSpeech',
|
||||
'speakPreloaded',
|
||||
'preprocessText',
|
||||
'pause',
|
||||
'resume',
|
||||
'getDefaultVoices'
|
||||
]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize the Kokoro TTS module
|
||||
* @returns {Promise<boolean>} - Resolves with success status
|
||||
*/
|
||||
async initialize() {
|
||||
try {
|
||||
console.log('Kokoro TTS: Initializing');
|
||||
this.state = 'INITIALIZING';
|
||||
|
||||
// Get dependencies
|
||||
this.reportProgress(10, 'Loading dependencies');
|
||||
|
||||
// The persistence manager is required for preferences
|
||||
const persistenceManager = this.getModule('persistence-manager');
|
||||
if (!persistenceManager) {
|
||||
console.error('Kokoro TTS: Required dependency persistence-manager not found');
|
||||
return false;
|
||||
}
|
||||
|
||||
// Try to check if the kokoro-js.js resource exists before proceeding
|
||||
try {
|
||||
this.reportProgress(20, 'Checking for Kokoro TTS resources');
|
||||
const response = await fetch('/js/kokoro-js.js', { method: 'HEAD' });
|
||||
if (!response.ok) {
|
||||
console.error(`Kokoro TTS: Required resource kokoro-js.js not found (${response.status})`);
|
||||
throw new Error('Kokoro TTS resource not available');
|
||||
}
|
||||
console.log('Kokoro TTS: Resources available');
|
||||
} catch (resourceError) {
|
||||
console.error('Kokoro TTS: Error checking resources', resourceError);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Create iframe for Kokoro TTS
|
||||
this.reportProgress(30, 'Creating Kokoro TTS iframe');
|
||||
console.log('Kokoro TTS: Creating iframe for Kokoro loader');
|
||||
const iframe = document.createElement('iframe');
|
||||
iframe.src = '/kokoro-loader.html';
|
||||
iframe.style.display = 'none';
|
||||
document.body.appendChild(iframe);
|
||||
this.iframe = iframe;
|
||||
|
||||
// Wait for iframe to load
|
||||
try {
|
||||
await new Promise((resolve, reject) => {
|
||||
iframe.onload = () => {
|
||||
console.log('Kokoro TTS: Iframe loaded successfully');
|
||||
resolve();
|
||||
};
|
||||
|
||||
iframe.onerror = (error) => {
|
||||
console.error('Kokoro TTS: Iframe failed to load:', error);
|
||||
reject(new Error('Kokoro TTS: Iframe failed to load'));
|
||||
};
|
||||
|
||||
iframe.onabort = () => {
|
||||
console.error('Kokoro TTS: Iframe load aborted');
|
||||
reject(new Error('Kokoro TTS: Iframe load aborted'));
|
||||
};
|
||||
});
|
||||
} catch (iframeError) {
|
||||
console.error('Kokoro TTS: Error loading iframe:', iframeError);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Add message event listener for progress updates from iframe
|
||||
window.addEventListener('message', this.handleIframeMessage);
|
||||
|
||||
// Wait for model to initialize
|
||||
try {
|
||||
this.reportProgress(50, 'Loading Kokoro model');
|
||||
console.log('Kokoro TTS: Waiting for model to initialize');
|
||||
|
||||
await new Promise((resolve, reject) => {
|
||||
// Create one-time handler for kokoro:ready message
|
||||
const readyHandler = (event) => {
|
||||
if (event.data && event.data.type === 'kokoro:ready') {
|
||||
window.removeEventListener('message', readyHandler);
|
||||
|
||||
// Validate the success status from the event
|
||||
if (event.data.success === false) {
|
||||
console.error('Kokoro TTS: Model initialization failed:', event.data.error || 'Unknown error');
|
||||
reject(new Error('Kokoro TTS: ' + (event.data.error || 'Model initialization failed')));
|
||||
return;
|
||||
}
|
||||
|
||||
console.log('Kokoro TTS: Model initialized successfully');
|
||||
this.modelLoaded = true;
|
||||
this.voices = event.data.voices || this.getDefaultVoices();
|
||||
resolve();
|
||||
}
|
||||
};
|
||||
|
||||
window.addEventListener('message', readyHandler);
|
||||
|
||||
// Send initialization message to iframe
|
||||
this.reportProgress(60, 'Initializing Kokoro model');
|
||||
console.log('Kokoro TTS: Sending initialization message to iframe');
|
||||
iframe.contentWindow.postMessage({ type: 'kokoro:initialize' }, '*');
|
||||
});
|
||||
} catch (modelError) {
|
||||
console.error('Kokoro TTS: Error initializing model:', modelError);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Get default voices
|
||||
this.reportProgress(80, 'Loading Kokoro voices');
|
||||
this.voices = this.getDefaultVoices();
|
||||
console.log('Kokoro TTS: Loaded default voices:', this.voices);
|
||||
|
||||
// Set voice based on preferences
|
||||
this.reportProgress(90, 'Setting up voice preferences');
|
||||
await this.setupVoiceFromPreferences(persistenceManager);
|
||||
console.log('Kokoro TTS: Voice preferences set up');
|
||||
|
||||
this.isReady = true;
|
||||
this.reportProgress(100, 'Kokoro TTS initialized');
|
||||
console.log('Kokoro TTS: Initialization complete');
|
||||
|
||||
return true;
|
||||
} catch (error) {
|
||||
console.error('Kokoro TTS: Initialization error:', error);
|
||||
this.isReady = false;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle messages from the iframe
|
||||
* @param {MessageEvent} event - Message event
|
||||
*/
|
||||
handleIframeMessage = (event) => {
|
||||
// Only process messages from our iframe
|
||||
if (!this.iframe || event.source !== this.iframe.contentWindow) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Process message
|
||||
if (event.data && event.data.type) {
|
||||
switch (event.data.type) {
|
||||
case 'kokoro:progress':
|
||||
if (event.data.progress) {
|
||||
// Track the last time we received a progress update
|
||||
this.lastProgressTime = Date.now();
|
||||
this.lastProgressValue = event.data.progress;
|
||||
this.modelLoadingProgress = event.data.progress;
|
||||
|
||||
// Update progress
|
||||
this.reportProgress(60 + Math.floor(event.data.progress * 0.3), `Loading Kokoro model: ${event.data.progress.toFixed(0)}%`);
|
||||
}
|
||||
break;
|
||||
|
||||
case 'kokoro:ready':
|
||||
// Clear any timeout we might have set
|
||||
this.modelLoaded = true;
|
||||
this.reportProgress(90, 'Kokoro model loaded');
|
||||
console.log('Kokoro TTS: Model ready event received');
|
||||
break;
|
||||
|
||||
case 'kokoro:error':
|
||||
console.error('Kokoro TTS: Error from iframe:', event.data.error);
|
||||
this.state = 'ERROR';
|
||||
break;
|
||||
|
||||
case 'kokoro:speech-generated':
|
||||
// Handle speech generation completion
|
||||
if (event.data.id !== undefined && this.pendingGenerations.has(event.data.id)) {
|
||||
const resolver = this.pendingGenerations.get(event.data.id);
|
||||
this.pendingGenerations.delete(event.data.id);
|
||||
|
||||
if (event.data.error) {
|
||||
resolver.reject(new Error(event.data.error));
|
||||
} else {
|
||||
resolver.resolve({
|
||||
success: true,
|
||||
audioData: event.data.audioData,
|
||||
duration: event.data.duration || 0
|
||||
});
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case 'kokoro:voices':
|
||||
// Update available voices
|
||||
if (Array.isArray(event.data.voices)) {
|
||||
this.voices = event.data.voices;
|
||||
document.dispatchEvent(new CustomEvent('tts:voices-updated', {
|
||||
detail: { engine: 'kokoro', voices: this.voices }
|
||||
}));
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set up the voice from preferences
|
||||
*/
|
||||
async setupVoiceFromPreferences(persistenceManager) {
|
||||
if (!persistenceManager) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Get current locale
|
||||
const localization = this.getModule('localization');
|
||||
const locale = localization ? localization.getLocale() : null;
|
||||
|
||||
// Get preferred voice from preferences
|
||||
const preferredVoiceId = persistenceManager.getPreference('tts', 'kokoro_voice', '');
|
||||
|
||||
// Find matching voice
|
||||
let selectedVoice = null;
|
||||
|
||||
if (preferredVoiceId) {
|
||||
// Try to find the specific voice
|
||||
selectedVoice = this.voices.find(v => v.id === preferredVoiceId);
|
||||
}
|
||||
|
||||
if (!selectedVoice) {
|
||||
// Find a voice for the current locale
|
||||
const normalizedLocale = locale ? locale.toLowerCase().replace('_', '-') : 'en-us';
|
||||
const languageCode = normalizedLocale.split('-')[0];
|
||||
|
||||
// Try to find an exact locale match
|
||||
selectedVoice = this.voices.find(v =>
|
||||
v.lang && v.lang.toLowerCase() === normalizedLocale
|
||||
);
|
||||
|
||||
// If not found, try to find a language match
|
||||
if (!selectedVoice) {
|
||||
selectedVoice = this.voices.find(v =>
|
||||
v.lang && v.lang.toLowerCase().startsWith(languageCode)
|
||||
);
|
||||
}
|
||||
|
||||
// If still not found, use the first voice
|
||||
if (!selectedVoice && this.voices.length > 0) {
|
||||
selectedVoice = this.voices[0];
|
||||
}
|
||||
}
|
||||
|
||||
// Set the voice
|
||||
if (selectedVoice) {
|
||||
this.setVoice(selectedVoice);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set voice for TTS
|
||||
* @param {Object} voice - Voice to set
|
||||
* @returns {boolean} - Success status
|
||||
*/
|
||||
setVoice(voice) {
|
||||
if (!voice || !voice.id) {
|
||||
return false;
|
||||
}
|
||||
|
||||
this.currentVoice = voice;
|
||||
|
||||
// Save to preferences
|
||||
const persistenceManager = this.getModule('persistence-manager');
|
||||
if (persistenceManager) {
|
||||
persistenceManager.updatePreference('tts', 'kokoro_voice', voice.id);
|
||||
}
|
||||
|
||||
// Send message to iframe
|
||||
if (this.iframe && this.iframe.contentWindow) {
|
||||
this.iframe.contentWindow.postMessage({
|
||||
type: 'kokoro:set-voice',
|
||||
voiceId: voice.id
|
||||
}, '*');
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set options for TTS
|
||||
* @param {Object} options - Options to set
|
||||
* @returns {boolean} - Success status
|
||||
*/
|
||||
setOptions(options) {
|
||||
if (!options) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Update rate and volume if provided
|
||||
if (options.rate !== undefined) {
|
||||
this.options.rate = options.rate;
|
||||
}
|
||||
|
||||
if (options.volume !== undefined) {
|
||||
this.options.volume = options.volume;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get available voices
|
||||
* @returns {Array} - Array of voice objects
|
||||
*/
|
||||
async getVoices() {
|
||||
// If no voices are loaded yet, return default voices
|
||||
if (!this.voices || this.voices.length === 0) {
|
||||
return this.getDefaultVoices();
|
||||
}
|
||||
|
||||
return this.voices;
|
||||
}
|
||||
|
||||
/**
|
||||
* Preprocess text for TTS
|
||||
* @param {string} text - Text to preprocess
|
||||
* @returns {string} - Preprocessed text
|
||||
*/
|
||||
preprocessText(text) {
|
||||
// Remove HTML tags
|
||||
text = text.replace(/<[^>]*>/g, ' ');
|
||||
|
||||
// Replace special characters
|
||||
text = text.replace(/&/g, ' and ');
|
||||
|
||||
// Normalize whitespace
|
||||
text = text.replace(/\s+/g, ' ').trim();
|
||||
|
||||
return text;
|
||||
}
|
||||
|
||||
/**
|
||||
* Preload speech for later playback
|
||||
* @param {string} text - Text to preload
|
||||
* @returns {Promise<Object>} - Resolves with preloaded audio data
|
||||
*/
|
||||
async preloadSpeech(text) {
|
||||
if (!this.isReady) {
|
||||
return { success: false, reason: 'not_ready' };
|
||||
}
|
||||
|
||||
// Generate speech audio data
|
||||
const result = await this.generateSpeech(text);
|
||||
|
||||
if (!result.success) {
|
||||
return { success: false, reason: 'generation_failed' };
|
||||
}
|
||||
|
||||
return {
|
||||
success: true,
|
||||
audioData: result.audioData,
|
||||
text,
|
||||
duration: result.duration || 0
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Speak text using preloaded audio
|
||||
* @param {Object} preloadData - Preloaded audio data
|
||||
* @param {Function} callback - Callback for when speech completes
|
||||
* @returns {boolean} - Success status
|
||||
*/
|
||||
speakPreloaded(preloadData, callback = null) {
|
||||
if (!this.isReady || !preloadData || !preloadData.audioData) {
|
||||
if (callback) {
|
||||
callback({ success: false, reason: 'invalid_data' });
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Stop any ongoing speech
|
||||
this.stop();
|
||||
|
||||
// Create audio from blob
|
||||
const audioBlob = new Blob([preloadData.audioData], { type: 'audio/mp3' });
|
||||
const audioUrl = URL.createObjectURL(audioBlob);
|
||||
|
||||
const audio = new Audio(audioUrl);
|
||||
audio.volume = this.options.volume;
|
||||
audio.playbackRate = this.options.rate;
|
||||
|
||||
// Set up event handlers
|
||||
audio.onended = () => {
|
||||
this.isSpeaking = false;
|
||||
if (callback) {
|
||||
callback({ success: true });
|
||||
}
|
||||
URL.revokeObjectURL(audioUrl);
|
||||
};
|
||||
|
||||
audio.onerror = (error) => {
|
||||
this.isSpeaking = false;
|
||||
if (callback) {
|
||||
callback({ success: false, reason: 'playback_error', error });
|
||||
}
|
||||
URL.revokeObjectURL(audioUrl);
|
||||
};
|
||||
|
||||
// Start playback
|
||||
this.currentAudio = audio;
|
||||
this.isSpeaking = true;
|
||||
audio.play().catch(error => {
|
||||
this.isSpeaking = false;
|
||||
if (callback) {
|
||||
callback({ success: false, reason: 'playback_error', error });
|
||||
}
|
||||
URL.revokeObjectURL(audioUrl);
|
||||
});
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Speak text
|
||||
* @param {string} text - Text to speak
|
||||
* @param {Function} callback - Callback for when speech completes
|
||||
* @returns {boolean} - Success status
|
||||
*/
|
||||
speak(text, callback = null) {
|
||||
if (!this.isReady) {
|
||||
if (callback) {
|
||||
callback({ success: false, reason: 'not_ready' });
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Preprocess text
|
||||
const processedText = this.preprocessText(text);
|
||||
|
||||
// Generate and play speech
|
||||
this.generateSpeech(processedText).then(result => {
|
||||
if (result.success && result.audioData) {
|
||||
// Create audio blob and URL
|
||||
const audioBlob = new Blob([result.audioData], { type: 'audio/mp3' });
|
||||
const audioUrl = URL.createObjectURL(audioBlob);
|
||||
|
||||
// Stop any ongoing speech
|
||||
this.stop();
|
||||
|
||||
// Create and play audio
|
||||
const audio = new Audio(audioUrl);
|
||||
audio.volume = this.options.volume;
|
||||
audio.playbackRate = this.options.rate;
|
||||
|
||||
// Set up event handlers
|
||||
audio.onended = () => {
|
||||
this.isSpeaking = false;
|
||||
if (callback) {
|
||||
callback({ success: true });
|
||||
}
|
||||
URL.revokeObjectURL(audioUrl);
|
||||
};
|
||||
|
||||
audio.onerror = (error) => {
|
||||
this.isSpeaking = false;
|
||||
if (callback) {
|
||||
callback({ success: false, reason: 'playback_error', error });
|
||||
}
|
||||
URL.revokeObjectURL(audioUrl);
|
||||
};
|
||||
|
||||
// Start playback
|
||||
this.currentAudio = audio;
|
||||
this.isSpeaking = true;
|
||||
audio.play().catch(error => {
|
||||
this.isSpeaking = false;
|
||||
if (callback) {
|
||||
callback({ success: false, reason: 'playback_error', error });
|
||||
}
|
||||
});
|
||||
} else {
|
||||
if (callback) {
|
||||
callback({ success: false, reason: 'generation_failed' });
|
||||
}
|
||||
}
|
||||
}).catch(error => {
|
||||
if (callback) {
|
||||
callback({ success: false, reason: 'generation_error', error });
|
||||
}
|
||||
});
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate speech using the iframe
|
||||
* @param {string} text - Text to generate speech for
|
||||
* @returns {Promise<Object>} - Resolves with audio data
|
||||
*/
|
||||
async generateSpeech(text) {
|
||||
if (!this.isReady || !this.iframe || !this.iframe.contentWindow) {
|
||||
return { success: false, reason: 'not_ready' };
|
||||
}
|
||||
|
||||
// Process text
|
||||
const processedText = this.preprocessText(text);
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
// Generate unique ID for this request
|
||||
const id = this.generationCounter++;
|
||||
|
||||
// Store resolver functions
|
||||
this.pendingGenerations.set(id, { resolve, reject });
|
||||
|
||||
// Send request to iframe
|
||||
this.iframe.contentWindow.postMessage({
|
||||
type: 'kokoro:generate-speech',
|
||||
text: processedText,
|
||||
id,
|
||||
voiceId: this.currentVoice ? this.currentVoice.id : null
|
||||
}, '*');
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop current speech
|
||||
* @returns {boolean} - Success status
|
||||
*/
|
||||
stop() {
|
||||
if (this.currentAudio) {
|
||||
try {
|
||||
this.currentAudio.pause();
|
||||
this.currentAudio.currentTime = 0;
|
||||
this.currentAudio = null;
|
||||
this.isSpeaking = false;
|
||||
return true;
|
||||
} catch (error) {
|
||||
console.error('Kokoro TTS: Error stopping speech:', error);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Pause current speech
|
||||
* @returns {boolean} - Success status
|
||||
*/
|
||||
pause() {
|
||||
if (this.currentAudio) {
|
||||
try {
|
||||
this.currentAudio.pause();
|
||||
return true;
|
||||
} catch (error) {
|
||||
console.error('Kokoro TTS: Error pausing speech:', error);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resume current speech
|
||||
* @returns {boolean} - Success status
|
||||
*/
|
||||
resume() {
|
||||
if (this.currentAudio) {
|
||||
try {
|
||||
this.currentAudio.play();
|
||||
return true;
|
||||
} catch (error) {
|
||||
console.error('Kokoro TTS: Error resuming speech:', error);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get default voices for current locale
|
||||
* @returns {Array} Default voices
|
||||
*/
|
||||
getDefaultVoices() {
|
||||
return [
|
||||
// American Female voices
|
||||
{ id: 'af_heart', name: 'Heart', lang: 'en-US', gender: 'female' },
|
||||
{ id: 'af_daisy', name: 'Daisy', lang: 'en-US', gender: 'female' },
|
||||
{ id: 'af_soft', name: 'Soft', lang: 'en-US', gender: 'female' },
|
||||
{ id: 'af_glados', name: 'GLaDOS', lang: 'en-US', gender: 'female' },
|
||||
{ id: 'af_southern_belle', name: 'Southern Belle', lang: 'en-US', gender: 'female' },
|
||||
{ id: 'af_dramatic', name: 'Dramatic', lang: 'en-US', gender: 'female' },
|
||||
{ id: 'af_valley_girl', name: 'Valley Girl', lang: 'en-US', gender: 'female' },
|
||||
{ id: 'af_british', name: 'British', lang: 'en-US', gender: 'female' },
|
||||
{ id: 'af_russian', name: 'Russian', lang: 'en-US', gender: 'female' },
|
||||
{ id: 'af_german', name: 'German', lang: 'en-US', gender: 'female' },
|
||||
{ id: 'af_cheeky_cute', name: 'Cheeky Cute', lang: 'en-US', gender: 'female' },
|
||||
|
||||
// American Male voices
|
||||
{ id: 'am_bruce', name: 'Bruce', lang: 'en-US', gender: 'male' },
|
||||
{ id: 'am_announcer', name: 'Announcer', lang: 'en-US', gender: 'male' },
|
||||
{ id: 'am_radio_host', name: 'Radio Host', lang: 'en-US', gender: 'male' },
|
||||
|
||||
// British Female voices
|
||||
{ id: 'bf_charlotte', name: 'Charlotte', lang: 'en-GB', gender: 'female' },
|
||||
{ id: 'bf_elizabeth', name: 'Elizabeth', lang: 'en-GB', gender: 'female' },
|
||||
{ id: 'bf_lily', name: 'Lily', lang: 'en-GB', gender: 'female' },
|
||||
{ id: 'bf_olivia', name: 'Olivia', lang: 'en-GB', gender: 'female' },
|
||||
{ id: 'bf_victoria', name: 'Victoria', lang: 'en-GB', gender: 'female' },
|
||||
|
||||
// British Male voices
|
||||
{ id: 'bm_william', name: 'William', lang: 'en-GB', gender: 'male' },
|
||||
{ id: 'bm_arthur', name: 'Arthur', lang: 'en-GB', gender: 'male' },
|
||||
{ id: 'bm_george', name: 'George', lang: 'en-GB', gender: 'male' },
|
||||
{ id: 'bm_harry', name: 'Harry', lang: 'en-GB', gender: 'male' },
|
||||
{ id: 'bm_jack', name: 'Jack', lang: 'en-GB', gender: 'male' }
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
// Register the module with the module registry
|
||||
// Module registry MUST be accessed via window, not direct import
|
||||
if (window.moduleRegistry) {
|
||||
try {
|
||||
// Create instance first, then register it
|
||||
const kokoroTTSModule = new KokoroTTSModule();
|
||||
window.moduleRegistry.register(kokoroTTSModule);
|
||||
console.log('Kokoro TTS Module registered successfully');
|
||||
} catch (err) {
|
||||
console.error('Failed to register Kokoro TTS Module:', err);
|
||||
}
|
||||
} else {
|
||||
console.error('Module registry not available when attempting to register Kokoro TTS Module');
|
||||
}
|
||||
@@ -96,6 +96,12 @@ const ModuleLoader = (function() {
|
||||
*/
|
||||
async function loadModuleScripts() {
|
||||
|
||||
// Define dependency scripts that need to be loaded first but aren't modules themselves
|
||||
const dependenciesToLoad = [
|
||||
{ script: '/js/api-tts-module-base.js' }, // Abstract base class, not a module
|
||||
{ script: '/js/tts-handler-module.js' } // Abstract base class for TTS handlers, not a module
|
||||
];
|
||||
|
||||
// Define modules with their weights
|
||||
const modulesToLoad = [
|
||||
// Core functionality modules
|
||||
@@ -108,6 +114,10 @@ const ModuleLoader = (function() {
|
||||
|
||||
// Audio and TTS modules
|
||||
{ id: 'audio-manager', script: '/js/audio-manager.js', weight: 60 },
|
||||
{ id: 'kokoro', script: '/js/kokoro-tts-module.js', weight: 65 },
|
||||
{ id: 'browser', script: '/js/browser-tts-module.js', weight: 65 },
|
||||
{ id: 'elevenlabs', script: '/js/elevenlabs-tts-module.js', weight: 65 },
|
||||
{ id: 'openai', script: '/js/openai-tts-module.js', weight: 65 },
|
||||
{ id: 'tts-factory', script: '/js/tts-factory.js', weight: 70 }, // TTSFactory must be loaded before TTSPlayer
|
||||
{ id: 'tts', script: '/js/tts-player.js', weight: 75 },
|
||||
|
||||
@@ -134,6 +144,10 @@ const ModuleLoader = (function() {
|
||||
createModuleListItem(module.id, getModuleNameFromId(module.id));
|
||||
});
|
||||
|
||||
// Load dependencies first
|
||||
const loadDependencies = dependenciesToLoad.map(dependency => loadScript(dependency.script));
|
||||
await Promise.all(loadDependencies);
|
||||
|
||||
// Load each module script
|
||||
const loadPromises = modulesToLoad.map(module => loadScript(module.script));
|
||||
return Promise.all(loadPromises);
|
||||
|
||||
@@ -0,0 +1,255 @@
|
||||
/**
|
||||
* OpenAITTSModule
|
||||
* Provides TTS via OpenAI API
|
||||
*/
|
||||
import { ApiTTSModuleBase } from './api-tts-module-base.js';
|
||||
|
||||
export class OpenAITTSModule extends ApiTTSModuleBase {
|
||||
constructor() {
|
||||
super('openai', 'OpenAI TTS');
|
||||
|
||||
// Voice options specific to OpenAI
|
||||
this.voiceOptions = {
|
||||
voice: 'alloy', // Default voice for OpenAI
|
||||
model: 'tts-1', // Standard model
|
||||
speed: 1.0,
|
||||
response_format: 'mp3' // OpenAI supports mp3, opus, aac, and flac (not wav)
|
||||
};
|
||||
|
||||
// Predefined voices - OpenAI has a fixed set
|
||||
this.voices = [
|
||||
{ id: 'alloy', name: 'Alloy', language: 'en' },
|
||||
{ id: 'echo', name: 'Echo', language: 'en' },
|
||||
{ id: 'fable', name: 'Fable', language: 'en' },
|
||||
{ id: 'onyx', name: 'Onyx', language: 'en' },
|
||||
{ id: 'nova', name: 'Nova', language: 'en' },
|
||||
{ id: 'shimmer', name: 'Shimmer', language: 'en' }
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the default API base URL for OpenAI
|
||||
* @returns {string} - Default API base URL
|
||||
*/
|
||||
getDefaultApiBaseUrl() {
|
||||
return 'https://api.openai.com/v1';
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize the module
|
||||
* @returns {Promise<boolean>} - Resolves with success status
|
||||
*/
|
||||
async initialize() {
|
||||
try {
|
||||
this.reportProgress(10, 'Initializing OpenAI TTS');
|
||||
|
||||
// Initialize parent
|
||||
const parentInit = await super.initialize();
|
||||
if (!parentInit) {
|
||||
console.error('OpenAI TTS: Parent initialization failed');
|
||||
return false;
|
||||
}
|
||||
|
||||
// Get required dependencies
|
||||
const persistenceManager = this.getModule('persistence-manager');
|
||||
if (!persistenceManager) {
|
||||
console.error('OpenAI TTS: Required dependency persistence-manager not found');
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check for API key
|
||||
const apiKey = persistenceManager.getPreference('openai', 'api_key', '');
|
||||
if (!apiKey) {
|
||||
console.error('OpenAI TTS: API key not configured');
|
||||
return false;
|
||||
}
|
||||
|
||||
// Set API key
|
||||
this.apiKey = apiKey;
|
||||
|
||||
// Load preferences
|
||||
const preferredVoice = persistenceManager.getPreference('openai', 'voice', this.voiceOptions.voice);
|
||||
if (preferredVoice) {
|
||||
this.voiceOptions.voice = preferredVoice;
|
||||
}
|
||||
|
||||
const preferredModel = persistenceManager.getPreference('openai', 'model', this.voiceOptions.model);
|
||||
if (preferredModel) {
|
||||
this.voiceOptions.model = preferredModel;
|
||||
}
|
||||
|
||||
const preferredSpeed = persistenceManager.getPreference('openai', 'speed', this.voiceOptions.speed);
|
||||
if (typeof preferredSpeed === 'number') {
|
||||
this.voiceOptions.speed = preferredSpeed;
|
||||
}
|
||||
|
||||
// Setup available voices
|
||||
this.voices = this.getAvailableVoices();
|
||||
|
||||
this.isReady = true;
|
||||
this.reportProgress(100, 'OpenAI TTS initialized');
|
||||
return true;
|
||||
} catch (error) {
|
||||
console.error('OpenAI TTS: Initialization error:', error);
|
||||
this.isReady = false;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Load available voices
|
||||
* @returns {Promise<boolean>} - Resolves with success status
|
||||
*/
|
||||
async loadVoices() {
|
||||
// OpenAI has a fixed set of voices, no need to fetch them
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Select a voice for the given locale
|
||||
* @param {string} locale - Locale code
|
||||
* @returns {boolean} - Success status
|
||||
*/
|
||||
selectVoiceForLocale(locale) {
|
||||
// Extract language code from locale (e.g., 'en-US' -> 'en')
|
||||
const langCode = locale.split('-')[0].toLowerCase();
|
||||
|
||||
// All OpenAI voices are English-based
|
||||
// For English locales, we could customize the voice selection
|
||||
// For non-English locales, we'll just use the default
|
||||
|
||||
// In this simple implementation, we'll just use the default voice
|
||||
return this.selectDefaultVoice();
|
||||
}
|
||||
|
||||
/**
|
||||
* Select a default voice
|
||||
* @returns {boolean} - Success status
|
||||
*/
|
||||
selectDefaultVoice() {
|
||||
this.voiceOptions.voice = 'alloy';
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate speech audio data using OpenAI API
|
||||
* @param {string} text - Text to generate speech for
|
||||
* @returns {Promise<Object>} - Audio data object
|
||||
*/
|
||||
async generateSpeechAudio(text) {
|
||||
if (!text || !this.apiKey) {
|
||||
return {
|
||||
success: false,
|
||||
reason: 'missing_api_key_or_text'
|
||||
};
|
||||
}
|
||||
|
||||
try {
|
||||
// Process the text
|
||||
const processedText = this.preprocessText(text);
|
||||
|
||||
// Create request payload
|
||||
const payload = {
|
||||
model: this.voiceOptions.model || 'tts-1',
|
||||
input: processedText,
|
||||
voice: this.voiceOptions.voice || 'alloy',
|
||||
response_format: this.voiceOptions.response_format || 'mp3',
|
||||
speed: this.voiceOptions.speed || 1.0
|
||||
};
|
||||
|
||||
// Make API request
|
||||
const response = await fetch(`${this.apiBaseUrl}/audio/speech`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': `Bearer ${this.apiKey}`
|
||||
},
|
||||
body: JSON.stringify(payload)
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
throw new Error(`API error: ${response.status} ${response.statusText} - ${errorText}`);
|
||||
}
|
||||
|
||||
// Get audio blob from response
|
||||
const audioBlob = await response.blob();
|
||||
|
||||
// Convert to array buffer for consistency with other modules
|
||||
const arrayBuffer = await audioBlob.arrayBuffer();
|
||||
|
||||
return {
|
||||
success: true,
|
||||
audioData: arrayBuffer
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('OpenAI TTS: Error generating speech:', error);
|
||||
return {
|
||||
success: false,
|
||||
reason: 'api_error',
|
||||
error: error.message
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set voice options
|
||||
* @param {Object} options - Voice options
|
||||
*/
|
||||
setVoiceOptions(options = {}) {
|
||||
// Handle common options
|
||||
if (options.voice) {
|
||||
this.voiceOptions.voice = options.voice;
|
||||
|
||||
// Save voice preference
|
||||
const persistenceManager = this.getModule('persistence-manager');
|
||||
if (persistenceManager) {
|
||||
persistenceManager.updatePreference('tts', 'openai_voice', options.voice);
|
||||
}
|
||||
}
|
||||
|
||||
if (typeof options.speed === 'number') {
|
||||
this.voiceOptions.speed = Math.max(0.5, Math.min(2.0, options.speed));
|
||||
}
|
||||
|
||||
// Handle OpenAI-specific options
|
||||
if (options.model) {
|
||||
this.voiceOptions.model = options.model;
|
||||
|
||||
// Save the model preference
|
||||
const persistenceManager = this.getModule('persistence-manager');
|
||||
if (persistenceManager) {
|
||||
persistenceManager.updatePreference('tts', 'openai_model', options.model);
|
||||
}
|
||||
}
|
||||
|
||||
if (options.response_format) {
|
||||
// Ensure valid format: mp3, opus, aac, or flac
|
||||
const validFormats = ['mp3', 'opus', 'aac', 'flac'];
|
||||
if (validFormats.includes(options.response_format)) {
|
||||
this.voiceOptions.response_format = options.response_format;
|
||||
|
||||
// Save the format preference
|
||||
const persistenceManager = this.getModule('persistence-manager');
|
||||
if (persistenceManager) {
|
||||
persistenceManager.updatePreference('tts', 'openai_format', options.response_format);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Register the module with the module registry
|
||||
// Module registry MUST be accessed via window, not direct import
|
||||
if (window.moduleRegistry) {
|
||||
try {
|
||||
// Create instance first, then register it
|
||||
const openAITTSModule = new OpenAITTSModule();
|
||||
window.moduleRegistry.register(openAITTSModule);
|
||||
console.log('OpenAI TTS Module registered successfully');
|
||||
} catch (err) {
|
||||
console.error('Failed to register OpenAI TTS Module:', err);
|
||||
}
|
||||
} else {
|
||||
console.error('Module registry not available when attempting to register OpenAI TTS Module');
|
||||
}
|
||||
+36
-8
@@ -632,13 +632,19 @@ class OptionsUIModule extends BaseModule {
|
||||
* Show the options modal
|
||||
*/
|
||||
show() {
|
||||
if (!this.modal) return;
|
||||
|
||||
// Reload preferences before showing
|
||||
this.loadPreferences();
|
||||
|
||||
// Show modal
|
||||
this.modal.style.display = 'flex';
|
||||
if (this.modal) {
|
||||
this.modal.style.display = 'flex';
|
||||
|
||||
// Refresh TTS dropdown
|
||||
this.populateTtsSystems();
|
||||
|
||||
// Make sure the UI reflects the current voice
|
||||
this.populateVoices();
|
||||
|
||||
// Update API settings visibility based on the current selection
|
||||
this.updateApiSettingsVisibility();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -669,6 +675,10 @@ class OptionsUIModule extends BaseModule {
|
||||
const ttsFactory = this.getModule('tts-factory');
|
||||
if (!ttsFactory) return;
|
||||
|
||||
// Debug TTS handlers to see what's happening
|
||||
console.log('Options UI: Debugging TTS handlers before populating dropdown');
|
||||
ttsFactory.debugTTSHandlers();
|
||||
|
||||
// Clear existing options
|
||||
this.elements.ttsSystem.innerHTML = '';
|
||||
|
||||
@@ -1123,9 +1133,27 @@ class OptionsUIModule extends BaseModule {
|
||||
|
||||
const available = event.detail?.available || false;
|
||||
|
||||
// Update the TTS options visibility
|
||||
// DON'T hide the TTS section completely, as this prevents configuring API keys
|
||||
// Instead, just mark it visually (we'll keep controls accessible)
|
||||
if (this.elements.ttsSection) {
|
||||
this.elements.ttsSection.style.display = available ? 'block' : 'none';
|
||||
// Set a visual indicator that TTS is not working, but keep it visible
|
||||
this.elements.ttsSection.classList.toggle('tts-unavailable', !available);
|
||||
// Add status message if not available
|
||||
if (!available && !this.elements.ttsUnavailableMessage) {
|
||||
const statusDiv = document.createElement('div');
|
||||
statusDiv.className = 'tts-status-message';
|
||||
statusDiv.innerHTML = '<strong>TTS Unavailable</strong>: Check logs for details. You can still configure API keys below.';
|
||||
statusDiv.style.color = '#ca3c3c';
|
||||
statusDiv.style.padding = '5px 0';
|
||||
statusDiv.style.marginBottom = '10px';
|
||||
this.elements.ttsUnavailableMessage = statusDiv;
|
||||
// Insert at the top of the TTS section
|
||||
this.elements.ttsSection.insertBefore(statusDiv, this.elements.ttsSection.firstChild);
|
||||
} else if (available && this.elements.ttsUnavailableMessage) {
|
||||
// Remove the message if TTS becomes available
|
||||
this.elements.ttsUnavailableMessage.remove();
|
||||
this.elements.ttsUnavailableMessage = null;
|
||||
}
|
||||
}
|
||||
|
||||
// Update the TTS system dropdown
|
||||
|
||||
+862
-486
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,202 @@
|
||||
/**
|
||||
* TTSHandlerModule Base Class
|
||||
* Base class for all TTS handler modules
|
||||
*/
|
||||
import { BaseModule } from './base-module.js';
|
||||
|
||||
export class TTSHandlerModule extends BaseModule {
|
||||
constructor(id, name) {
|
||||
super(id, name);
|
||||
|
||||
// Common TTS handler properties
|
||||
this.isReady = false;
|
||||
this.isSpeaking = false;
|
||||
this.currentUtterance = null;
|
||||
this.voices = [];
|
||||
this.currentVoice = null;
|
||||
this.defaultVoice = null;
|
||||
this.speechRate = 1.0;
|
||||
this.pitch = 1.0;
|
||||
this.volume = 1.0;
|
||||
|
||||
// Common dependencies for TTS handlers
|
||||
this.dependencies = ['persistence-manager', 'localization'];
|
||||
|
||||
// Bind common methods
|
||||
this.bindMethods([
|
||||
'speak',
|
||||
'stop',
|
||||
'getVoices',
|
||||
'setVoice',
|
||||
'configure',
|
||||
'generateSpeech'
|
||||
]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the handler ID
|
||||
* @returns {string} - The handler ID
|
||||
*/
|
||||
getId() {
|
||||
return this.id;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize the TTS handler
|
||||
* @returns {Promise<boolean>} - Resolves with success status
|
||||
*/
|
||||
async initialize() {
|
||||
try {
|
||||
this.reportProgress(20, `Initializing ${this.name}`);
|
||||
|
||||
// Check for required dependencies
|
||||
const persistenceManager = this.getModule('persistence-manager');
|
||||
if (!persistenceManager) {
|
||||
console.error(`${this.name}: Persistence Manager dependency not found`);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Load preferences
|
||||
this.loadPreferences(persistenceManager);
|
||||
|
||||
// Set up event listeners
|
||||
this.setupEventListeners();
|
||||
|
||||
return true;
|
||||
} catch (error) {
|
||||
console.error(`${this.name}: Initialization error`, error);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Load preferences from persistence manager
|
||||
* @param {Object} persistenceManager - The persistence manager module
|
||||
*/
|
||||
loadPreferences(persistenceManager) {
|
||||
// Load common preferences
|
||||
this.speechRate = persistenceManager.getPreference('tts', 'rate', 1.0);
|
||||
this.pitch = persistenceManager.getPreference('tts', 'pitch', 1.0);
|
||||
this.volume = persistenceManager.getPreference('tts', 'volume', 1.0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set up common event listeners
|
||||
*/
|
||||
setupEventListeners() {
|
||||
// To be implemented by subclasses if needed
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the handler is ready
|
||||
* @returns {boolean} - Whether the handler is ready
|
||||
*/
|
||||
isHandlerReady() {
|
||||
return this.isReady;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the handler is currently speaking
|
||||
* @returns {boolean} - Whether the handler is speaking
|
||||
*/
|
||||
isSpeakingNow() {
|
||||
return this.isSpeaking;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get available voices
|
||||
* @returns {Promise<Array>} - Resolves with array of voice objects
|
||||
*/
|
||||
async getVoices() {
|
||||
return this.voices;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the voice to use
|
||||
* @param {string} voiceId - Voice identifier
|
||||
* @returns {boolean} - Success status
|
||||
*/
|
||||
setVoice(voiceId) {
|
||||
// To be implemented by subclasses
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Configure TTS parameters
|
||||
* @param {Object} options - Configuration options
|
||||
* @returns {boolean} - Success status
|
||||
*/
|
||||
configure(options) {
|
||||
let changed = false;
|
||||
|
||||
if (options.voice && options.voice !== this.currentVoice) {
|
||||
this.setVoice(options.voice);
|
||||
changed = true;
|
||||
}
|
||||
|
||||
if (options.speed && options.speed !== this.speechRate) {
|
||||
this.speechRate = options.speed;
|
||||
changed = true;
|
||||
}
|
||||
|
||||
if (options.pitch && options.pitch !== this.pitch) {
|
||||
this.pitch = options.pitch;
|
||||
changed = true;
|
||||
}
|
||||
|
||||
if (options.volume && options.volume !== this.volume) {
|
||||
this.volume = options.volume;
|
||||
changed = true;
|
||||
}
|
||||
|
||||
// Save preferences if changed
|
||||
if (changed) {
|
||||
const persistenceManager = this.getModule('persistence-manager');
|
||||
if (persistenceManager) {
|
||||
persistenceManager.updatePreference('tts', 'rate', this.speechRate);
|
||||
persistenceManager.updatePreference('tts', 'pitch', this.pitch);
|
||||
persistenceManager.updatePreference('tts', 'volume', this.volume);
|
||||
if (this.currentVoice) {
|
||||
persistenceManager.updatePreference('tts', `${this.id}_voice`, this.currentVoice);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return changed;
|
||||
}
|
||||
|
||||
/**
|
||||
* Speak text
|
||||
* @param {string} text - Text to speak
|
||||
* @param {Function} callback - Callback for when speech completes
|
||||
* @returns {boolean} - Success status
|
||||
*/
|
||||
speak(text, callback) {
|
||||
// To be implemented by subclasses
|
||||
console.error(`${this.name}: speak() method not implemented`);
|
||||
if (callback) {
|
||||
setTimeout(() => callback({ success: false, reason: 'not_implemented' }), 0);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop speaking
|
||||
* @returns {boolean} - Success status
|
||||
*/
|
||||
stop() {
|
||||
// To be implemented by subclasses
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate speech audio data
|
||||
* @param {string} text - Text to generate speech for
|
||||
* @param {Object} options - Generation options
|
||||
* @returns {Promise<Object>} - Resolves with audio data
|
||||
*/
|
||||
async generateSpeech(text, options = {}) {
|
||||
// To be implemented by subclasses
|
||||
return { success: false, reason: 'not_implemented' };
|
||||
}
|
||||
}
|
||||
+36
-101
@@ -59,83 +59,28 @@
|
||||
}
|
||||
}
|
||||
|
||||
// Create a global object to store Kokoro instance
|
||||
// Create a simple loader object to handle the Kokoro instance
|
||||
window.KokoroLoader = {
|
||||
loaded: false,
|
||||
error: null,
|
||||
instance: null,
|
||||
kokoroTTS: null,
|
||||
voices: null,
|
||||
callbacks: [],
|
||||
progress: 0,
|
||||
progressMessage: 'Initializing...',
|
||||
|
||||
// Register a callback for when Kokoro is loaded
|
||||
onLoad: function(callback) {
|
||||
if (this.loaded) {
|
||||
callback(this.instance);
|
||||
} else if (this.error) {
|
||||
callback(null, this.error);
|
||||
} else {
|
||||
this.callbacks.push(callback);
|
||||
}
|
||||
},
|
||||
initialized: false,
|
||||
|
||||
// Update progress
|
||||
updateProgress: function(progress, message) {
|
||||
this.progress = progress;
|
||||
this.progressMessage = message || 'Loading...';
|
||||
const progressPercent = Math.round(progress * 100);
|
||||
document.getElementById('status').textContent = `${this.progressMessage} (${isNaN(progressPercent) ? 0 : progressPercent}%)`;
|
||||
log(`Progress: ${this.progressMessage} (${isNaN(progressPercent) ? 0 : progressPercent}%)`);
|
||||
document.getElementById('status').textContent = `${message} (${progressPercent}%)`;
|
||||
log(`Progress: ${message} (${progressPercent}%)`);
|
||||
|
||||
// Notify parent window
|
||||
if (window.parent !== window) {
|
||||
// Only notify parent if progress is valid
|
||||
if (progress !== undefined && !isNaN(progress) && window.parent !== window) {
|
||||
window.parent.postMessage({
|
||||
type: 'kokoro-progress',
|
||||
progress: isNaN(progress) ? 0 : progress,
|
||||
message: this.progressMessage
|
||||
progress: progress,
|
||||
message: message
|
||||
}, '*');
|
||||
}
|
||||
},
|
||||
|
||||
// Get default voices
|
||||
getDefaultVoices: function() {
|
||||
return [
|
||||
// American Female voices
|
||||
{ id: 'af_heart', name: 'Heart', lang: 'en-US', gender: 'female' },
|
||||
{ id: 'af_daisy', name: 'Daisy', lang: 'en-US', gender: 'female' },
|
||||
{ id: 'af_soft', name: 'Soft', lang: 'en-US', gender: 'female' },
|
||||
{ id: 'af_glados', name: 'GLaDOS', lang: 'en-US', gender: 'female' },
|
||||
{ id: 'af_southern_belle', name: 'Southern Belle', lang: 'en-US', gender: 'female' },
|
||||
{ id: 'af_dramatic', name: 'Dramatic', lang: 'en-US', gender: 'female' },
|
||||
{ id: 'af_valley_girl', name: 'Valley Girl', lang: 'en-US', gender: 'female' },
|
||||
{ id: 'af_british', name: 'British', lang: 'en-US', gender: 'female' },
|
||||
{ id: 'af_russian', name: 'Russian', lang: 'en-US', gender: 'female' },
|
||||
{ id: 'af_german', name: 'German', lang: 'en-US', gender: 'female' },
|
||||
{ id: 'af_cheeky_cute', name: 'Cheeky Cute', lang: 'en-US', gender: 'female' },
|
||||
|
||||
// American Male voices
|
||||
{ id: 'am_bruce', name: 'Bruce', lang: 'en-US', gender: 'male' },
|
||||
{ id: 'am_announcer', name: 'Announcer', lang: 'en-US', gender: 'male' },
|
||||
{ id: 'am_radio_host', name: 'Radio Host', lang: 'en-US', gender: 'male' },
|
||||
|
||||
// British Female voices
|
||||
{ id: 'bf_charlotte', name: 'Charlotte', lang: 'en-GB', gender: 'female' },
|
||||
{ id: 'bf_elizabeth', name: 'Elizabeth', lang: 'en-GB', gender: 'female' },
|
||||
{ id: 'bf_lily', name: 'Lily', lang: 'en-GB', gender: 'female' },
|
||||
{ id: 'bf_olivia', name: 'Olivia', lang: 'en-GB', gender: 'female' },
|
||||
{ id: 'bf_victoria', name: 'Victoria', lang: 'en-GB', gender: 'female' },
|
||||
|
||||
// British Male voices
|
||||
{ id: 'bm_william', name: 'William', lang: 'en-GB', gender: 'male' },
|
||||
{ id: 'bm_arthur', name: 'Arthur', lang: 'en-GB', gender: 'male' },
|
||||
{ id: 'bm_george', name: 'George', lang: 'en-GB', gender: 'male' },
|
||||
{ id: 'bm_harry', name: 'Harry', lang: 'en-GB', gender: 'male' },
|
||||
{ id: 'bm_jack', name: 'Jack', lang: 'en-GB', gender: 'male' }
|
||||
];
|
||||
},
|
||||
|
||||
// Initialize Kokoro
|
||||
init: async function() {
|
||||
try {
|
||||
@@ -144,77 +89,56 @@
|
||||
|
||||
// Store the KokoroTTS class
|
||||
this.kokoroTTS = KokoroTTS;
|
||||
log('Kokoro library loaded successfully', 'success');
|
||||
log('Kokoro library loaded', 'success');
|
||||
this.updateProgress(0.3, 'Initializing Kokoro model...');
|
||||
|
||||
// Initialize the model
|
||||
const model_id = "onnx-community/Kokoro-82M-v1.0-ONNX";
|
||||
this.instance = await this.kokoroTTS.from_pretrained(model_id, {
|
||||
dtype: "q8", // Use quantized model for better performance
|
||||
device: "wasm", // Use WebAssembly for compatibility
|
||||
device: "webgpu", // Use WebGL for better performance
|
||||
progress_callback: (progress) => {
|
||||
// Map progress from 0-1 to 30-90
|
||||
// Skip progress updates if progress is NaN/undefined (cache loading)
|
||||
if (progress === undefined || isNaN(progress)) {
|
||||
log('Model loaded from cache', 'info');
|
||||
return;
|
||||
}
|
||||
|
||||
// Map progress from 0-1 to 30-90%
|
||||
const mappedProgress = 0.3 + (progress * 0.6);
|
||||
this.updateProgress(mappedProgress, `Loading Kokoro model: ${Math.round(progress * 100)}%`);
|
||||
}
|
||||
});
|
||||
|
||||
// Fetch available voices
|
||||
log('Fetching available voices...');
|
||||
this.updateProgress(0.8, 'Fetching voices...');
|
||||
|
||||
// Use default voices directly since the list_voices method is unreliable
|
||||
log('Using predefined voice list instead of attempting to fetch from model');
|
||||
this.voices = this.getDefaultVoices();
|
||||
log(`Using ${this.voices.length} predefined voices`, 'success');
|
||||
|
||||
log('Testing Kokoro with a simple text');
|
||||
this.updateProgress(0.95, 'Testing Kokoro...');
|
||||
|
||||
// Test with a simple text
|
||||
// Use the first available voice for testing
|
||||
const testVoice = this.voices && this.voices.length > 0 ? this.voices[0].id : 'af_heart';
|
||||
await this.instance.generate('Test', { voice: testVoice });
|
||||
|
||||
log('Kokoro initialized successfully', 'success');
|
||||
this.loaded = true;
|
||||
log('Model initialized successfully', 'success');
|
||||
this.updateProgress(1.0, 'Kokoro ready');
|
||||
this.initialized = true;
|
||||
|
||||
// Notify parent window
|
||||
// Notify parent window of successful initialization
|
||||
if (window.parent !== window) {
|
||||
log('Notifying parent window of successful initialization');
|
||||
window.parent.postMessage({
|
||||
type: 'kokoro-ready',
|
||||
success: true,
|
||||
voices: this.voices
|
||||
type: 'kokoro:ready',
|
||||
success: true
|
||||
}, '*');
|
||||
}
|
||||
|
||||
// Call all callbacks
|
||||
log(`Calling ${this.callbacks.length} registered callbacks`);
|
||||
this.callbacks.forEach(callback => callback(this.instance));
|
||||
|
||||
document.getElementById('status').textContent = 'Kokoro loaded and ready!';
|
||||
} catch (error) {
|
||||
const errorMsg = error.message || 'Unknown error';
|
||||
log(`Error initializing Kokoro: ${errorMsg}`, 'error');
|
||||
console.error('Error initializing Kokoro:', error);
|
||||
this.error = error;
|
||||
|
||||
// Notify parent window
|
||||
if (window.parent !== window) {
|
||||
log('Notifying parent window of initialization failure');
|
||||
window.parent.postMessage({
|
||||
type: 'kokoro-ready',
|
||||
type: 'kokoro:ready',
|
||||
success: false,
|
||||
error: errorMsg
|
||||
}, '*');
|
||||
}
|
||||
|
||||
// Call all callbacks with error
|
||||
log(`Calling ${this.callbacks.length} registered callbacks with error`);
|
||||
this.callbacks.forEach(callback => callback(null, error));
|
||||
|
||||
document.getElementById('status').textContent = `Error loading Kokoro: ${errorMsg}`;
|
||||
}
|
||||
}
|
||||
@@ -232,9 +156,20 @@
|
||||
|
||||
const data = event.data;
|
||||
|
||||
if (data.type === 'kokoro-generate') {
|
||||
if (data.type === 'kokoro:initialize') {
|
||||
// If we're already initialized, just send the ready message
|
||||
if (window.KokoroLoader.initialized) {
|
||||
log('Already initialized, sending ready message');
|
||||
window.parent.postMessage({
|
||||
type: 'kokoro:ready',
|
||||
success: true
|
||||
}, '*');
|
||||
}
|
||||
// Otherwise init() will handle sending the ready message when done
|
||||
}
|
||||
else if (data.type === 'kokoro-generate') {
|
||||
// Generate speech in a non-blocking way
|
||||
if (!window.KokoroLoader.loaded) {
|
||||
if (!window.KokoroLoader.initialized || !window.KokoroLoader.instance) {
|
||||
log(`Cannot process generation request ${data.id}: Kokoro not loaded`, 'error');
|
||||
window.parent.postMessage({
|
||||
type: 'kokoro-generated',
|
||||
|
||||
Reference in New Issue
Block a user