658 lines
24 KiB
JavaScript
658 lines
24 KiB
JavaScript
/**
|
|
* KokoroTTSModule for AI Interactive Fiction
|
|
* Implementation using the Kokoro library
|
|
*/
|
|
import { TTSHandlerModule } from './tts-handler-module.js';
|
|
|
|
export class KokoroTTSModule extends TTSHandlerModule {
|
|
constructor() {
|
|
super('kokoro', 'Kokoro TTS');
|
|
|
|
// State
|
|
this.iframe = null;
|
|
this.currentAudio = null;
|
|
this.pendingGenerations = new Map();
|
|
this.generationCounter = 0;
|
|
this.voices = [];
|
|
this.lastProgressTime = null;
|
|
this.lastProgressValue = null;
|
|
this.modelLoaded = false;
|
|
|
|
// Bind additional methods beyond those in TTSHandlerModule
|
|
this.bindMethods([
|
|
'handleIframeMessage',
|
|
'setupVoiceFromPreferences',
|
|
'generateSpeech',
|
|
'speakPreloaded',
|
|
'preprocessText',
|
|
'pause',
|
|
'resume',
|
|
'getDefaultVoices'
|
|
]);
|
|
}
|
|
|
|
/**
|
|
* Initialize the Kokoro TTS module
|
|
* @returns {Promise<boolean>} - Resolves with success status
|
|
*/
|
|
async initialize() {
|
|
try {
|
|
console.log('Kokoro TTS: Initializing');
|
|
this.state = 'INITIALIZING';
|
|
|
|
// Get dependencies
|
|
this.reportProgress(10, 'Loading dependencies');
|
|
|
|
// The persistence manager is required for preferences
|
|
const persistenceManager = this.getModule('persistence-manager');
|
|
if (!persistenceManager) {
|
|
console.error('Kokoro TTS: Required dependency persistence-manager not found');
|
|
return false;
|
|
}
|
|
|
|
// Try to check if the kokoro-js.js resource exists before proceeding
|
|
try {
|
|
this.reportProgress(20, 'Checking for Kokoro TTS resources');
|
|
const response = await fetch('/js/kokoro-js.js', { method: 'HEAD' });
|
|
if (!response.ok) {
|
|
console.error(`Kokoro TTS: Required resource kokoro-js.js not found (${response.status})`);
|
|
throw new Error('Kokoro TTS resource not available');
|
|
}
|
|
console.log('Kokoro TTS: Resources available');
|
|
} catch (resourceError) {
|
|
console.error('Kokoro TTS: Error checking resources', resourceError);
|
|
return false;
|
|
}
|
|
|
|
// Create iframe for Kokoro TTS
|
|
this.reportProgress(30, 'Creating Kokoro TTS iframe');
|
|
console.log('Kokoro TTS: Creating iframe for Kokoro loader');
|
|
const iframe = document.createElement('iframe');
|
|
iframe.src = '/kokoro-loader.html';
|
|
iframe.style.display = 'none';
|
|
document.body.appendChild(iframe);
|
|
this.iframe = iframe;
|
|
|
|
// Wait for iframe to load
|
|
try {
|
|
await new Promise((resolve, reject) => {
|
|
iframe.onload = () => {
|
|
console.log('Kokoro TTS: Iframe loaded successfully');
|
|
resolve();
|
|
};
|
|
|
|
iframe.onerror = (error) => {
|
|
console.error('Kokoro TTS: Iframe failed to load:', error);
|
|
reject(new Error('Kokoro TTS: Iframe failed to load'));
|
|
};
|
|
|
|
iframe.onabort = () => {
|
|
console.error('Kokoro TTS: Iframe load aborted');
|
|
reject(new Error('Kokoro TTS: Iframe load aborted'));
|
|
};
|
|
});
|
|
} catch (iframeError) {
|
|
console.error('Kokoro TTS: Error loading iframe:', iframeError);
|
|
return false;
|
|
}
|
|
|
|
// Add message event listener for progress updates from iframe
|
|
window.addEventListener('message', this.handleIframeMessage);
|
|
|
|
// Wait for model to initialize
|
|
try {
|
|
this.reportProgress(50, 'Loading Kokoro model');
|
|
console.log('Kokoro TTS: Waiting for model to initialize');
|
|
|
|
await new Promise((resolve, reject) => {
|
|
// Create one-time handler for kokoro:ready message
|
|
const readyHandler = (event) => {
|
|
if (event.data && event.data.type === 'kokoro:ready') {
|
|
window.removeEventListener('message', readyHandler);
|
|
|
|
// Validate the success status from the event
|
|
if (event.data.success === false) {
|
|
console.error('Kokoro TTS: Model initialization failed:', event.data.error || 'Unknown error');
|
|
reject(new Error('Kokoro TTS: ' + (event.data.error || 'Model initialization failed')));
|
|
return;
|
|
}
|
|
|
|
console.log('Kokoro TTS: Model initialized successfully');
|
|
this.modelLoaded = true;
|
|
this.voices = event.data.voices || this.getDefaultVoices();
|
|
resolve();
|
|
}
|
|
};
|
|
|
|
window.addEventListener('message', readyHandler);
|
|
|
|
// Send initialization message to iframe
|
|
this.reportProgress(60, 'Initializing Kokoro model');
|
|
console.log('Kokoro TTS: Sending initialization message to iframe');
|
|
iframe.contentWindow.postMessage({ type: 'kokoro:initialize' }, '*');
|
|
});
|
|
} catch (modelError) {
|
|
console.error('Kokoro TTS: Error initializing model:', modelError);
|
|
return false;
|
|
}
|
|
|
|
// Get default voices
|
|
this.reportProgress(80, 'Loading Kokoro voices');
|
|
this.voices = this.getDefaultVoices();
|
|
console.log('Kokoro TTS: Loaded default voices:', this.voices);
|
|
|
|
// Set voice based on preferences
|
|
this.reportProgress(90, 'Setting up voice preferences');
|
|
await this.setupVoiceFromPreferences(persistenceManager);
|
|
console.log('Kokoro TTS: Voice preferences set up');
|
|
|
|
this.isReady = true;
|
|
this.reportProgress(100, 'Kokoro TTS initialized');
|
|
console.log('Kokoro TTS: Initialization complete');
|
|
|
|
return true;
|
|
} catch (error) {
|
|
console.error('Kokoro TTS: Initialization error:', error);
|
|
this.isReady = false;
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Handle messages from the iframe
|
|
* @param {MessageEvent} event - Message event
|
|
*/
|
|
handleIframeMessage = (event) => {
|
|
// Only process messages from our iframe
|
|
if (!this.iframe || event.source !== this.iframe.contentWindow) {
|
|
return;
|
|
}
|
|
|
|
// Process message
|
|
if (event.data && event.data.type) {
|
|
switch (event.data.type) {
|
|
case 'kokoro:progress':
|
|
if (event.data.progress) {
|
|
// Track the last time we received a progress update
|
|
this.lastProgressTime = Date.now();
|
|
this.lastProgressValue = event.data.progress;
|
|
this.modelLoadingProgress = event.data.progress;
|
|
|
|
// Update progress
|
|
this.reportProgress(60 + Math.floor(event.data.progress * 0.3), `Loading Kokoro model: ${event.data.progress.toFixed(0)}%`);
|
|
}
|
|
break;
|
|
|
|
case 'kokoro:ready':
|
|
// Clear any timeout we might have set
|
|
this.modelLoaded = true;
|
|
this.reportProgress(90, 'Kokoro model loaded');
|
|
console.log('Kokoro TTS: Model ready event received');
|
|
break;
|
|
|
|
case 'kokoro:error':
|
|
console.error('Kokoro TTS: Error from iframe:', event.data.error);
|
|
this.state = 'ERROR';
|
|
break;
|
|
|
|
case 'kokoro:speech-generated':
|
|
// Handle speech generation completion
|
|
if (event.data.id !== undefined && this.pendingGenerations.has(event.data.id)) {
|
|
const resolver = this.pendingGenerations.get(event.data.id);
|
|
this.pendingGenerations.delete(event.data.id);
|
|
|
|
if (event.data.error) {
|
|
resolver.reject(new Error(event.data.error));
|
|
} else {
|
|
resolver.resolve({
|
|
success: true,
|
|
audioData: event.data.audioData,
|
|
duration: event.data.duration || 0
|
|
});
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'kokoro:voices':
|
|
// Update available voices
|
|
if (Array.isArray(event.data.voices)) {
|
|
this.voices = event.data.voices;
|
|
document.dispatchEvent(new CustomEvent('tts:voices-updated', {
|
|
detail: { engine: 'kokoro', voices: this.voices }
|
|
}));
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Set up the voice from preferences
|
|
*/
|
|
async setupVoiceFromPreferences(persistenceManager) {
|
|
if (!persistenceManager) {
|
|
return false;
|
|
}
|
|
|
|
// Get current locale
|
|
const localization = this.getModule('localization');
|
|
const locale = localization ? localization.getLocale() : null;
|
|
|
|
// Get preferred voice from preferences
|
|
const preferredVoiceId = persistenceManager.getPreference('tts', 'kokoro_voice', '');
|
|
|
|
// Find matching voice
|
|
let selectedVoice = null;
|
|
|
|
if (preferredVoiceId) {
|
|
// Try to find the specific voice
|
|
selectedVoice = this.voices.find(v => v.id === preferredVoiceId);
|
|
}
|
|
|
|
if (!selectedVoice) {
|
|
// Find a voice for the current locale
|
|
const normalizedLocale = locale ? locale.toLowerCase().replace('_', '-') : 'en-us';
|
|
const languageCode = normalizedLocale.split('-')[0];
|
|
|
|
// Try to find an exact locale match
|
|
selectedVoice = this.voices.find(v =>
|
|
v.lang && v.lang.toLowerCase() === normalizedLocale
|
|
);
|
|
|
|
// If not found, try to find a language match
|
|
if (!selectedVoice) {
|
|
selectedVoice = this.voices.find(v =>
|
|
v.lang && v.lang.toLowerCase().startsWith(languageCode)
|
|
);
|
|
}
|
|
|
|
// If still not found, use the first voice
|
|
if (!selectedVoice && this.voices.length > 0) {
|
|
selectedVoice = this.voices[0];
|
|
}
|
|
}
|
|
|
|
// Set the voice
|
|
if (selectedVoice) {
|
|
this.setVoice(selectedVoice);
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* Set voice for TTS
|
|
* @param {Object} voice - Voice to set
|
|
* @returns {boolean} - Success status
|
|
*/
|
|
setVoice(voice) {
|
|
if (!voice || !voice.id) {
|
|
return false;
|
|
}
|
|
|
|
this.currentVoice = voice;
|
|
|
|
// Save to preferences
|
|
const persistenceManager = this.getModule('persistence-manager');
|
|
if (persistenceManager) {
|
|
persistenceManager.updatePreference('tts', 'kokoro_voice', voice.id);
|
|
}
|
|
|
|
// Send message to iframe
|
|
if (this.iframe && this.iframe.contentWindow) {
|
|
this.iframe.contentWindow.postMessage({
|
|
type: 'kokoro:set-voice',
|
|
voiceId: voice.id
|
|
}, '*');
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Set options for TTS
|
|
* @param {Object} options - Options to set
|
|
* @returns {boolean} - Success status
|
|
*/
|
|
setOptions(options) {
|
|
if (!options) {
|
|
return false;
|
|
}
|
|
|
|
// Update rate and volume if provided
|
|
if (options.rate !== undefined) {
|
|
this.options.rate = options.rate;
|
|
}
|
|
|
|
if (options.volume !== undefined) {
|
|
this.options.volume = options.volume;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Get available voices
|
|
* @returns {Array} - Array of voice objects
|
|
*/
|
|
async getVoices() {
|
|
// If no voices are loaded yet, return default voices
|
|
if (!this.voices || this.voices.length === 0) {
|
|
return this.getDefaultVoices();
|
|
}
|
|
|
|
return this.voices;
|
|
}
|
|
|
|
/**
|
|
* Preprocess text for TTS
|
|
* @param {string} text - Text to preprocess
|
|
* @returns {string} - Preprocessed text
|
|
*/
|
|
preprocessText(text) {
|
|
// Remove HTML tags
|
|
text = text.replace(/<[^>]*>/g, ' ');
|
|
|
|
// Replace special characters
|
|
text = text.replace(/&/g, ' and ');
|
|
|
|
// Normalize whitespace
|
|
text = text.replace(/\s+/g, ' ').trim();
|
|
|
|
return text;
|
|
}
|
|
|
|
/**
|
|
* Preload speech for later playback
|
|
* @param {string} text - Text to preload
|
|
* @returns {Promise<Object>} - Resolves with preloaded audio data
|
|
*/
|
|
async preloadSpeech(text) {
|
|
if (!this.isReady) {
|
|
return { success: false, reason: 'not_ready' };
|
|
}
|
|
|
|
// Generate speech audio data
|
|
const result = await this.generateSpeech(text);
|
|
|
|
if (!result.success) {
|
|
return { success: false, reason: 'generation_failed' };
|
|
}
|
|
|
|
return {
|
|
success: true,
|
|
audioData: result.audioData,
|
|
text,
|
|
duration: result.duration || 0
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Speak text using preloaded audio
|
|
* @param {Object} preloadData - Preloaded audio data
|
|
* @param {Function} callback - Callback for when speech completes
|
|
* @returns {boolean} - Success status
|
|
*/
|
|
speakPreloaded(preloadData, callback = null) {
|
|
if (!this.isReady || !preloadData || !preloadData.audioData) {
|
|
if (callback) {
|
|
callback({ success: false, reason: 'invalid_data' });
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// Stop any ongoing speech
|
|
this.stop();
|
|
|
|
// Create audio from blob
|
|
const audioBlob = new Blob([preloadData.audioData], { type: 'audio/mp3' });
|
|
const audioUrl = URL.createObjectURL(audioBlob);
|
|
|
|
const audio = new Audio(audioUrl);
|
|
audio.volume = this.options.volume;
|
|
audio.playbackRate = this.options.rate;
|
|
|
|
// Set up event handlers
|
|
audio.onended = () => {
|
|
this.isSpeaking = false;
|
|
if (callback) {
|
|
callback({ success: true });
|
|
}
|
|
URL.revokeObjectURL(audioUrl);
|
|
};
|
|
|
|
audio.onerror = (error) => {
|
|
this.isSpeaking = false;
|
|
if (callback) {
|
|
callback({ success: false, reason: 'playback_error', error });
|
|
}
|
|
URL.revokeObjectURL(audioUrl);
|
|
};
|
|
|
|
// Start playback
|
|
this.currentAudio = audio;
|
|
this.isSpeaking = true;
|
|
audio.play().catch(error => {
|
|
this.isSpeaking = false;
|
|
if (callback) {
|
|
callback({ success: false, reason: 'playback_error', error });
|
|
}
|
|
URL.revokeObjectURL(audioUrl);
|
|
});
|
|
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Speak text
|
|
* @param {string} text - Text to speak
|
|
* @param {Function} callback - Callback for when speech completes
|
|
* @returns {boolean} - Success status
|
|
*/
|
|
speak(text, callback = null) {
|
|
if (!this.isReady) {
|
|
if (callback) {
|
|
callback({ success: false, reason: 'not_ready' });
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// Preprocess text
|
|
const processedText = this.preprocessText(text);
|
|
|
|
// Generate and play speech
|
|
this.generateSpeech(processedText).then(result => {
|
|
if (result.success && result.audioData) {
|
|
// Create audio blob and URL
|
|
const audioBlob = new Blob([result.audioData], { type: 'audio/mp3' });
|
|
const audioUrl = URL.createObjectURL(audioBlob);
|
|
|
|
// Stop any ongoing speech
|
|
this.stop();
|
|
|
|
// Create and play audio
|
|
const audio = new Audio(audioUrl);
|
|
audio.volume = this.options.volume;
|
|
audio.playbackRate = this.options.rate;
|
|
|
|
// Set up event handlers
|
|
audio.onended = () => {
|
|
this.isSpeaking = false;
|
|
if (callback) {
|
|
callback({ success: true });
|
|
}
|
|
URL.revokeObjectURL(audioUrl);
|
|
};
|
|
|
|
audio.onerror = (error) => {
|
|
this.isSpeaking = false;
|
|
if (callback) {
|
|
callback({ success: false, reason: 'playback_error', error });
|
|
}
|
|
URL.revokeObjectURL(audioUrl);
|
|
};
|
|
|
|
// Start playback
|
|
this.currentAudio = audio;
|
|
this.isSpeaking = true;
|
|
audio.play().catch(error => {
|
|
this.isSpeaking = false;
|
|
if (callback) {
|
|
callback({ success: false, reason: 'playback_error', error });
|
|
}
|
|
});
|
|
} else {
|
|
if (callback) {
|
|
callback({ success: false, reason: 'generation_failed' });
|
|
}
|
|
}
|
|
}).catch(error => {
|
|
if (callback) {
|
|
callback({ success: false, reason: 'generation_error', error });
|
|
}
|
|
});
|
|
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Generate speech using the iframe
|
|
* @param {string} text - Text to generate speech for
|
|
* @returns {Promise<Object>} - Resolves with audio data
|
|
*/
|
|
async generateSpeech(text) {
|
|
if (!this.isReady || !this.iframe || !this.iframe.contentWindow) {
|
|
return { success: false, reason: 'not_ready' };
|
|
}
|
|
|
|
// Process text
|
|
const processedText = this.preprocessText(text);
|
|
|
|
return new Promise((resolve, reject) => {
|
|
// Generate unique ID for this request
|
|
const id = this.generationCounter++;
|
|
|
|
// Store resolver functions
|
|
this.pendingGenerations.set(id, { resolve, reject });
|
|
|
|
// Send request to iframe
|
|
this.iframe.contentWindow.postMessage({
|
|
type: 'kokoro:generate-speech',
|
|
text: processedText,
|
|
id,
|
|
voiceId: this.currentVoice ? this.currentVoice.id : null
|
|
}, '*');
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Stop current speech
|
|
* @returns {boolean} - Success status
|
|
*/
|
|
stop() {
|
|
if (this.currentAudio) {
|
|
try {
|
|
this.currentAudio.pause();
|
|
this.currentAudio.currentTime = 0;
|
|
this.currentAudio = null;
|
|
this.isSpeaking = false;
|
|
return true;
|
|
} catch (error) {
|
|
console.error('Kokoro TTS: Error stopping speech:', error);
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Pause current speech
|
|
* @returns {boolean} - Success status
|
|
*/
|
|
pause() {
|
|
if (this.currentAudio) {
|
|
try {
|
|
this.currentAudio.pause();
|
|
return true;
|
|
} catch (error) {
|
|
console.error('Kokoro TTS: Error pausing speech:', error);
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Resume current speech
|
|
* @returns {boolean} - Success status
|
|
*/
|
|
resume() {
|
|
if (this.currentAudio) {
|
|
try {
|
|
this.currentAudio.play();
|
|
return true;
|
|
} catch (error) {
|
|
console.error('Kokoro TTS: Error resuming speech:', error);
|
|
return false;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* Get default voices for current locale
|
|
* @returns {Array} Default voices
|
|
*/
|
|
getDefaultVoices() {
|
|
return [
|
|
// American Female voices
|
|
{ id: 'af_heart', name: 'Heart', lang: 'en-US', gender: 'female' },
|
|
{ id: 'af_daisy', name: 'Daisy', lang: 'en-US', gender: 'female' },
|
|
{ id: 'af_soft', name: 'Soft', lang: 'en-US', gender: 'female' },
|
|
{ id: 'af_glados', name: 'GLaDOS', lang: 'en-US', gender: 'female' },
|
|
{ id: 'af_southern_belle', name: 'Southern Belle', lang: 'en-US', gender: 'female' },
|
|
{ id: 'af_dramatic', name: 'Dramatic', lang: 'en-US', gender: 'female' },
|
|
{ id: 'af_valley_girl', name: 'Valley Girl', lang: 'en-US', gender: 'female' },
|
|
{ id: 'af_british', name: 'British', lang: 'en-US', gender: 'female' },
|
|
{ id: 'af_russian', name: 'Russian', lang: 'en-US', gender: 'female' },
|
|
{ id: 'af_german', name: 'German', lang: 'en-US', gender: 'female' },
|
|
{ id: 'af_cheeky_cute', name: 'Cheeky Cute', lang: 'en-US', gender: 'female' },
|
|
|
|
// American Male voices
|
|
{ id: 'am_bruce', name: 'Bruce', lang: 'en-US', gender: 'male' },
|
|
{ id: 'am_announcer', name: 'Announcer', lang: 'en-US', gender: 'male' },
|
|
{ id: 'am_radio_host', name: 'Radio Host', lang: 'en-US', gender: 'male' },
|
|
|
|
// British Female voices
|
|
{ id: 'bf_charlotte', name: 'Charlotte', lang: 'en-GB', gender: 'female' },
|
|
{ id: 'bf_elizabeth', name: 'Elizabeth', lang: 'en-GB', gender: 'female' },
|
|
{ id: 'bf_lily', name: 'Lily', lang: 'en-GB', gender: 'female' },
|
|
{ id: 'bf_olivia', name: 'Olivia', lang: 'en-GB', gender: 'female' },
|
|
{ id: 'bf_victoria', name: 'Victoria', lang: 'en-GB', gender: 'female' },
|
|
|
|
// British Male voices
|
|
{ id: 'bm_william', name: 'William', lang: 'en-GB', gender: 'male' },
|
|
{ id: 'bm_arthur', name: 'Arthur', lang: 'en-GB', gender: 'male' },
|
|
{ id: 'bm_george', name: 'George', lang: 'en-GB', gender: 'male' },
|
|
{ id: 'bm_harry', name: 'Harry', lang: 'en-GB', gender: 'male' },
|
|
{ id: 'bm_jack', name: 'Jack', lang: 'en-GB', gender: 'male' }
|
|
];
|
|
}
|
|
}
|
|
|
|
// Register the module with the module registry
|
|
// Module registry MUST be accessed via window, not direct import
|
|
if (window.moduleRegistry) {
|
|
try {
|
|
// Create instance first, then register it
|
|
const kokoroTTSModule = new KokoroTTSModule();
|
|
window.moduleRegistry.register(kokoroTTSModule);
|
|
console.log('Kokoro TTS Module registered successfully');
|
|
} catch (err) {
|
|
console.error('Failed to register Kokoro TTS Module:', err);
|
|
}
|
|
} else {
|
|
console.error('Module registry not available when attempting to register Kokoro TTS Module');
|
|
}
|