/** * KokoroHandler for AI Interactive Fiction * Handles neural TTS via Kokoro.js with progress reporting and non-blocking loading */ import { TTSHandler } from './tts-handler.js'; export class KokoroHandler extends TTSHandler { constructor() { super(); // Initialize the base TTSHandler this.kokoro = null; this.isReady = false; this.currentUtterance = null; // Set default voice options through the base class this.voiceOptions = { voice: 'bf_alice', // Default to high-quality voice speed: 1.0 }; this.progressCallback = null; this.scriptLoaded = false; this.useLegacyFormat = false; this.legacySpeak = null; this.availableVoices = []; this.worker = null; this.workerReady = false; this.pendingWorkerRequests = []; this.workerInitialized = false; } /** * Get the ID of this provider * @returns {string} - Provider ID */ getId() { return 'kokoro'; } /** * Initialize the Kokoro TTS system * @param {Function} progressCallback - Optional callback for progress updates * @returns {Promise} - Resolves to true if initialization was successful */ async initialize(progressCallback = null) { this.progressCallback = progressCallback; try { // First load the script if not already loaded if (this.progressCallback) this.progressCallback(10, "Loading Kokoro script"); await this.loadKokoroScript(); // Check if the kokoro library is loaded correctly if (!window.kokoro) { console.error("Kokoro TTS library not loaded correctly"); this.isReady = false; document.dispatchEvent(new CustomEvent('kokoro-loading-complete', { detail: { success: false, error: "Library not loaded" } })); return false; } // Report progress if (this.progressCallback) { this.progressCallback(40, "Creating Kokoro instance"); } // Initialize Kokoro instance try { const apiFound = await this.checkKokoroApi(); if (!apiFound) { console.error("Kokoro TTS API not found in loaded module"); this.isReady = false; if (this.progressCallback) this.progressCallback(100, "Kokoro API not found"); document.dispatchEvent(new CustomEvent('kokoro-loading-complete', { detail: { success: false, error: "API not found" } })); return false; } // Initialize the Web Worker for speech generation await this.initWorker(); // Set ready state this.isReady = true; if (this.isReady) { console.log('Kokoro TTS initialized successfully'); if (this.progressCallback) { this.progressCallback(100, "Kokoro TTS ready"); } } else { console.warn('Kokoro TTS initialized but not available'); if (this.progressCallback) { this.progressCallback(100, "Kokoro TTS unavailable"); } } } catch (initError) { console.error("Failed to initialize Kokoro instance:", initError); this.isReady = false; if (this.progressCallback) { this.progressCallback(100, "Kokoro initialization failed"); } } // Dispatch event indicating completion, with success status document.dispatchEvent(new CustomEvent('kokoro-loading-complete', { detail: { success: this.isReady } })); return this.isReady; } catch (error) { console.error('Failed to initialize Kokoro TTS:', error); this.isReady = false; // Dispatch event even on error to unblock waiting processes document.dispatchEvent(new CustomEvent('kokoro-loading-complete', { detail: { success: false, error: error.message } })); return false; } } /** * Initialize the Web Worker for speech generation * @returns {Promise} - Resolves when the worker is initialized */ initWorker() { return new Promise((resolve, reject) => { try { // Create the worker this.worker = new Worker('/js/kokoro-worker.js'); // Handle messages from the worker this.worker.onmessage = (e) => { const { type, result, error } = e.data; switch (type) { case 'ready': console.log('Kokoro worker is ready'); this.workerReady = true; // Process any pending requests this.processPendingRequests(); resolve(); break; case 'initialized': console.log('Kokoro worker initialized'); this.workerInitialized = true; break; case 'generated': if (this.currentUtterance && this.currentUtterance.resolveGenerate) { // Convert the ArrayBuffer back to Float32Array const audioData = { audio: new Float32Array(result.audio), sampling_rate: result.sampling_rate }; this.currentUtterance.resolveGenerate(audioData); } break; case 'error': console.error('Kokoro worker error:', error); if (this.currentUtterance && this.currentUtterance.rejectGenerate) { this.currentUtterance.rejectGenerate(new Error(error)); } break; default: console.warn('Unknown message type from worker:', type); } }; // Handle worker errors this.worker.onerror = (error) => { console.error('Kokoro worker error:', error); this.workerReady = false; if (this.currentUtterance && this.currentUtterance.rejectGenerate) { this.currentUtterance.rejectGenerate(error); } reject(error); }; // Initialize the worker this.worker.postMessage({ type: 'init' }); } catch (error) { console.error('Failed to initialize Kokoro worker:', error); reject(error); } }); } /** * Process any pending worker requests */ processPendingRequests() { if (this.pendingWorkerRequests.length > 0 && this.workerReady) { const request = this.pendingWorkerRequests.shift(); this.generateInWorker(request.text, request.options) .then(request.resolve) .catch(request.reject); // Process the next request after a small delay to keep UI responsive if (this.pendingWorkerRequests.length > 0) { setTimeout(() => this.processPendingRequests(), 10); } } } /** * Generate speech in the worker * @param {string} text - Text to convert to speech * @param {Object} options - Voice options * @returns {Promise} - Resolves with audio data */ generateInWorker(text, options) { return new Promise((resolve, reject) => { if (!this.worker || !this.workerReady) { // Queue the request if worker isn't ready this.pendingWorkerRequests.push({ text, options, resolve, reject }); return; } // Store the promise callbacks in the current utterance if (this.currentUtterance) { this.currentUtterance.resolveGenerate = resolve; this.currentUtterance.rejectGenerate = reject; // Send the generation request to the worker this.worker.postMessage({ type: 'generate', data: { text, voice: options.voice, speed: options.speed } }); } else { reject(new Error('No current utterance for worker generation')); } }); } /** * Load the Kokoro script dynamically * @returns {Promise} - Resolves when script is loaded */ loadKokoroScript() { return new Promise((resolve, reject) => { // Check if already loaded if (this.scriptLoaded || typeof window.kokoro !== 'undefined') { this.scriptLoaded = true; resolve(); return; } // Import the module using dynamic import import('/js/kokoro-js.js') .then(module => { this.scriptLoaded = true; console.log("Kokoro module structure:", Object.keys(module)); // Store the module in window.kokoro window.kokoro = module; if (this.progressCallback) this.progressCallback(30, "Kokoro script loaded"); console.log("Kokoro script loaded successfully via dynamic import"); if (module.KokoroTTS) { console.log("Found KokoroTTS class in module"); resolve(); } else if (module.TextSplitterStream && typeof module.TextSplitterStream === 'function') { console.log("Found TextSplitterStream in module, this may be the correct format"); resolve(); } else if (module.pipeline || (module.default && module.default.pipeline)) { console.log("Found pipeline in module, using Xenova/kokoro-tts format"); window.kokoroTTS = module.default || module; this.useLegacyFormat = true; resolve(); } else { console.error("Kokoro module found but couldn't locate TTS constructor"); reject(new Error('Kokoro TTS API not found in module')); } }) .catch(error => { console.error("Error importing Kokoro module:", error); reject(error); }); }); } /** * Check if the Kokoro library is loaded correctly and locate the API * @returns {boolean} - True if API is found */ async checkKokoroApi() { if (this.useLegacyFormat && window.kokoroTTS) { // Handle legacy format (Xenova/kokoro-tts) try { if (this.progressCallback) this.progressCallback(40, "Loading Kokoro model"); this.kokoro = await window.kokoroTTS.pipeline('text-to-speech', { quantized: true, progress_callback: (progress) => { if (this.progressCallback && progress % 10 === 0 || progress === 100) { const message = `Loading Kokoro model: ${progress}%`; this.progressCallback(40 + (progress * 0.5), message); } } }); this.legacySpeak = async (text, options) => { return await this.kokoro(text, options); }; if (this.progressCallback) this.progressCallback(90, "Kokoro model loaded"); this.isReady = !!this.kokoro; return this.isReady; } catch (error) { console.error("Failed to initialize legacy Kokoro pipeline:", error); return false; } } else if (window.kokoro) { // Handle newer kokoro-js format try { if (window.kokoro.KokoroTTS) { console.log("Using KokoroTTS class from module"); if (this.progressCallback) this.progressCallback(40, "Creating Kokoro TTS instance"); const model_id = "onnx-community/Kokoro-82M-v1.0-ONNX"; try { console.log("Starting Kokoro model initialization - this may take some time"); if (this.progressCallback) this.progressCallback(50, "Loading Kokoro model files"); // Use the from_pretrained method to load the model this.kokoro = await window.kokoro.KokoroTTS.from_pretrained(model_id, { dtype: "fp32", // Using fp32 for better quality device: "wasm", // Using wasm for compatibility onProgress: (progress) => { if (this.progressCallback) { const scaledProgress = 40 + (progress * 50); const intProgress = Math.floor(progress * 100); if (intProgress % 10 === 0 || intProgress === 100) { const message = `Loading Kokoro model: ${intProgress}%`; this.progressCallback(Math.min(90, scaledProgress), message); } } } }); console.log("Kokoro model initialization complete"); if (this.progressCallback) this.progressCallback(90, "Kokoro model loaded"); // Available voices from the library this.availableVoices = [ 'af_heart', 'af_alloy', 'af_aoede', 'af_bella', 'af_jessica', 'af_kore', 'af_nicole', 'af_nova', 'af_river', 'af_sarah', 'af_sky', 'am_adam', 'am_echo', 'am_eric', 'am_fenrir', 'am_liam', 'am_michael', 'am_onyx', 'am_puck', 'am_santa', 'bf_emma', 'bf_isabella', 'bm_george', 'bm_lewis', 'bf_alice', 'bf_lily', 'bm_daniel', 'bm_fable' ]; console.log("Using available voices:", this.availableVoices); this.isReady = true; return true; } catch (modelError) { console.error("Failed to initialize Kokoro model:", modelError); return false; } } console.error("Could not find compatible API in Kokoro module"); return false; } catch (error) { console.error("Failed to initialize Kokoro instance:", error); return false; } } return false; } /** * Check if Kokoro TTS is available * @returns {boolean} - True if Kokoro is ready to use */ isAvailable() { return this.isReady && this.kokoro !== null; } /** * Check if voice is currently speaking * @returns {boolean} - True if speaking */ isSpeaking() { return this.currentUtterance !== null; } /** * Speak text using Kokoro TTS * @param {string} text - The text to speak * @param {Function} callback - Called when speech completes */ async speak(text, callback = null) { if (!this.isAvailable() || !text) { if (callback) callback("TTS not available or no text provided"); return; } console.log(`Attempting to speak: "${text}" with voice: ${this.voiceOptions.voice}`); // Stop any current speech this.stop(); try { // Process text for better TTS quality const processedText = this.processText(text); // Create the utterance object to track state this.currentUtterance = { text: processedText, options: { ...this.voiceOptions }, onComplete: callback }; if (window.kokoro.KokoroTTS && this.kokoro) { // Generate audio with the selected voice using the worker console.log(`Generating audio for text: "${processedText}" with voice: ${this.voiceOptions.voice}`); try { // Generate speech in the worker const audioData = await this.generateInWorker(processedText, { voice: this.voiceOptions.voice, speed: this.voiceOptions.speed || 1.0 }); if (!audioData) { throw new Error("Worker generated no audio data"); } // Create WAV from Float32Array for browser playback const wavBlob = this.float32ArrayToWav(audioData.audio, audioData.sampling_rate); const url = URL.createObjectURL(wavBlob); const audio = new Audio(url); // Set up event handlers for the audio element audio.oncanplay = async () => { try { await audio.play(); } catch (playError) { console.error("Error playing audio:", playError); if (this.currentUtterance && this.currentUtterance.onComplete) { this.currentUtterance.onComplete("Play error: " + playError.message); } URL.revokeObjectURL(url); } }; audio.onended = () => { if (this.currentUtterance && this.currentUtterance.onComplete) { this.currentUtterance.onComplete("Playback completed successfully"); } this.currentUtterance = null; URL.revokeObjectURL(url); }; audio.onerror = (error) => { console.error("Error playing Kokoro audio:", error); if (this.currentUtterance && this.currentUtterance.onComplete) { this.currentUtterance.onComplete("Audio error: " + (error.message || "unknown error")); } this.currentUtterance = null; URL.revokeObjectURL(url); }; // Store the audio element for stopping later this.currentUtterance.audio = audio; } catch (genError) { console.error("Error generating audio:", genError); if (callback) callback("Generate error: " + genError.message); this.currentUtterance = null; } } else { console.error("Kokoro TTS not properly initialized"); if (callback) callback("Kokoro not initialized"); this.currentUtterance = null; } } catch (error) { console.error('Error speaking with Kokoro TTS:', error); if (callback) callback("Error: " + error.message); this.currentUtterance = null; } } /** * Stop any ongoing speech */ stop() { if (this.currentUtterance && this.currentUtterance.audio) { try { this.currentUtterance.audio.pause(); this.currentUtterance.audio.currentTime = 0; } catch (error) { console.error('Error stopping Kokoro TTS:', error); } // Call the completion callback if it exists if (this.currentUtterance.onComplete) { this.currentUtterance.onComplete(); } this.currentUtterance = null; } } /** * Set the speech rate/speed * @param {number} speed - Speech rate (0.5-2.0) */ setSpeed(speed) { // Ensure speed is within valid range const normalizedSpeed = Math.max(0.5, Math.min(2.0, speed)); this.voiceOptions.speed = normalizedSpeed; } /** * Set the voice to use * @param {string} voice - Voice identifier */ setVoice(voice) { if (voice && typeof voice === 'string') { this.voiceOptions.voice = voice; } } /** * Get available voices * @returns {Promise} - Array of available voices */ async getVoices() { if (!this.isAvailable()) return []; try { // Return the manually collected list of voices if (this.availableVoices && this.availableVoices.length > 0) { return this.availableVoices.map(id => ({ id: id, name: this.formatVoiceId(id) })); } // Fallback to hardcoded list if needed return [ { id: 'af_heart', name: 'Heart (Female)' }, { id: 'af_bella', name: 'Bella (Female)' }, { id: 'am_michael', name: 'Michael (Male)' }, { id: 'bf_emma', name: 'Emma (British Female)' }, { id: 'bm_george', name: 'George (British Male)' } ]; } catch (error) { console.error('Error getting Kokoro voices:', error); return []; } } /** * Format a voice ID into a readable name * @param {string} voiceId - The voice ID to format * @returns {string} - Formatted voice name */ formatVoiceId(voiceId) { if (!voiceId) return "Unknown Voice"; // Convert id like "en_us_female" to "English (US) - Female" const parts = voiceId.split('_'); if (parts.length >= 3) { // Try to map language codes const languageMap = { 'en': 'English', 'fr': 'French', 'de': 'German', 'es': 'Spanish', 'it': 'Italian', 'pt': 'Portuguese', 'pl': 'Polish', 'ja': 'Japanese', 'ko': 'Korean', 'zh': 'Chinese', 'ru': 'Russian' }; // Get language name const langName = languageMap[parts[0]] || parts[0].toUpperCase(); // Get region const region = parts[1].toUpperCase(); // Get gender or voice type const voiceType = parts.slice(2).join(' ').replace(/\b\w/g, l => l.toUpperCase()); return `${langName} (${region}) - ${voiceType}`; } // For other naming formats, capitalize words return voiceId.replace(/_/g, ' ').replace(/\b\w/g, l => l.toUpperCase()); } /** * Process text for better TTS quality * @param {string} text - Raw text * @returns {string} - Processed text */ processText(text) { if (!text) return ''; // Replace specific patterns for better TTS let processed = text .replace(/\s+/g, ' ') // Normalize whitespace .replace(/--/g, '—') // Em dash .replace(/\.\.\./g, '…') // Ellipsis .trim(); return processed; } /** * Convert a Float32Array to a WAV format Blob * @param {Float32Array} float32Array - The audio data as Float32Array * @param {number} sampleRate - The sample rate of the audio * @returns {Blob} - A WAV format Blob */ float32ArrayToWav(float32Array, sampleRate = 24000) { // WAV header is 44 bytes const buffer = new ArrayBuffer(44 + float32Array.length * 4); const view = new DataView(buffer); // Write WAV header // "RIFF" chunk descriptor this.writeString(view, 0, 'RIFF'); view.setUint32(4, 36 + float32Array.length * 4, true); // File size this.writeString(view, 8, 'WAVE'); // "fmt " sub-chunk this.writeString(view, 12, 'fmt '); view.setUint32(16, 16, true); // Subchunk1Size (16 for PCM) view.setUint16(20, 3, true); // AudioFormat (3 for IEEE float) view.setUint16(22, 1, true); // NumChannels (1 for mono) view.setUint32(24, sampleRate, true); // SampleRate view.setUint32(28, sampleRate * 4, true); // ByteRate (SampleRate * NumChannels * BitsPerSample/8) view.setUint16(32, 4, true); // BlockAlign (NumChannels * BitsPerSample/8) view.setUint16(34, 32, true); // BitsPerSample (32 bits for float) // "data" sub-chunk this.writeString(view, 36, 'data'); view.setUint32(40, float32Array.length * 4, true); // Subchunk2Size // Write the Float32Array data const offset = 44; for (let i = 0; i < float32Array.length; i++) { view.setFloat32(offset + i * 4, float32Array[i], true); } // Create a Blob from the buffer return new Blob([buffer], { type: 'audio/wav' }); } /** * Write a string to a DataView * @param {DataView} view - The DataView to write to * @param {number} offset - The offset to write at * @param {string} string - The string to write */ writeString(view, offset, string) { for (let i = 0; i < string.length; i++) { view.setUint8(offset + i, string.charCodeAt(i)); } } /** * Clean up resources when this handler is no longer needed */ dispose() { this.stop(); // Terminate the worker if it exists if (this.worker) { this.worker.terminate(); this.worker = null; } // Clean up other resources this.kokoro = null; this.isReady = false; } }