Files
ai.interactive.fiction/public/js/kokoro-handler.js
T

708 lines
28 KiB
JavaScript

/**
* KokoroHandler for AI Interactive Fiction
* Handles neural TTS via Kokoro.js with progress reporting and non-blocking loading
*/
import { TTSHandler } from './tts-handler.js';
export class KokoroHandler extends TTSHandler {
constructor() {
super(); // Initialize the base TTSHandler
this.kokoro = null;
this.isReady = false;
this.currentUtterance = null;
// Set default voice options through the base class
this.voiceOptions = {
voice: 'bf_alice', // Default to high-quality voice
speed: 1.0
};
this.progressCallback = null;
this.scriptLoaded = false;
this.useLegacyFormat = false;
this.legacySpeak = null;
this.availableVoices = [];
this.worker = null;
this.workerReady = false;
this.pendingWorkerRequests = [];
this.workerInitialized = false;
}
/**
* Get the ID of this provider
* @returns {string} - Provider ID
*/
getId() {
return 'kokoro';
}
/**
* Initialize the Kokoro TTS system
* @param {Function} progressCallback - Optional callback for progress updates
* @returns {Promise<boolean>} - Resolves to true if initialization was successful
*/
async initialize(progressCallback = null) {
this.progressCallback = progressCallback;
try {
// First load the script if not already loaded
if (this.progressCallback) this.progressCallback(10, "Loading Kokoro script");
await this.loadKokoroScript();
// Check if the kokoro library is loaded correctly
if (!window.kokoro) {
console.error("Kokoro TTS library not loaded correctly");
this.isReady = false;
document.dispatchEvent(new CustomEvent('kokoro-loading-complete', {
detail: { success: false, error: "Library not loaded" }
}));
return false;
}
// Report progress
if (this.progressCallback) {
this.progressCallback(40, "Creating Kokoro instance");
}
// Initialize Kokoro instance
try {
const apiFound = await this.checkKokoroApi();
if (!apiFound) {
console.error("Kokoro TTS API not found in loaded module");
this.isReady = false;
if (this.progressCallback) this.progressCallback(100, "Kokoro API not found");
document.dispatchEvent(new CustomEvent('kokoro-loading-complete', {
detail: { success: false, error: "API not found" }
}));
return false;
}
// Initialize the Web Worker for speech generation
await this.initWorker();
// Set ready state
this.isReady = true;
if (this.isReady) {
console.log('Kokoro TTS initialized successfully');
if (this.progressCallback) {
this.progressCallback(100, "Kokoro TTS ready");
}
} else {
console.warn('Kokoro TTS initialized but not available');
if (this.progressCallback) {
this.progressCallback(100, "Kokoro TTS unavailable");
}
}
} catch (initError) {
console.error("Failed to initialize Kokoro instance:", initError);
this.isReady = false;
if (this.progressCallback) {
this.progressCallback(100, "Kokoro initialization failed");
}
}
// Dispatch event indicating completion, with success status
document.dispatchEvent(new CustomEvent('kokoro-loading-complete', {
detail: { success: this.isReady }
}));
return this.isReady;
} catch (error) {
console.error('Failed to initialize Kokoro TTS:', error);
this.isReady = false;
// Dispatch event even on error to unblock waiting processes
document.dispatchEvent(new CustomEvent('kokoro-loading-complete', {
detail: { success: false, error: error.message }
}));
return false;
}
}
/**
* Initialize the Web Worker for speech generation
* @returns {Promise<void>} - Resolves when the worker is initialized
*/
initWorker() {
return new Promise((resolve, reject) => {
try {
// Create the worker
this.worker = new Worker('/js/kokoro-worker.js');
// Handle messages from the worker
this.worker.onmessage = (e) => {
const { type, result, error } = e.data;
switch (type) {
case 'ready':
console.log('Kokoro worker is ready');
this.workerReady = true;
// Process any pending requests
this.processPendingRequests();
resolve();
break;
case 'initialized':
console.log('Kokoro worker initialized');
this.workerInitialized = true;
break;
case 'generated':
if (this.currentUtterance && this.currentUtterance.resolveGenerate) {
// Convert the ArrayBuffer back to Float32Array
const audioData = {
audio: new Float32Array(result.audio),
sampling_rate: result.sampling_rate
};
this.currentUtterance.resolveGenerate(audioData);
}
break;
case 'error':
console.error('Kokoro worker error:', error);
if (this.currentUtterance && this.currentUtterance.rejectGenerate) {
this.currentUtterance.rejectGenerate(new Error(error));
}
break;
default:
console.warn('Unknown message type from worker:', type);
}
};
// Handle worker errors
this.worker.onerror = (error) => {
console.error('Kokoro worker error:', error);
this.workerReady = false;
if (this.currentUtterance && this.currentUtterance.rejectGenerate) {
this.currentUtterance.rejectGenerate(error);
}
reject(error);
};
// Initialize the worker
this.worker.postMessage({
type: 'init'
});
} catch (error) {
console.error('Failed to initialize Kokoro worker:', error);
reject(error);
}
});
}
/**
* Process any pending worker requests
*/
processPendingRequests() {
if (this.pendingWorkerRequests.length > 0 && this.workerReady) {
const request = this.pendingWorkerRequests.shift();
this.generateInWorker(request.text, request.options)
.then(request.resolve)
.catch(request.reject);
// Process the next request after a small delay to keep UI responsive
if (this.pendingWorkerRequests.length > 0) {
setTimeout(() => this.processPendingRequests(), 10);
}
}
}
/**
* Generate speech in the worker
* @param {string} text - Text to convert to speech
* @param {Object} options - Voice options
* @returns {Promise<Object>} - Resolves with audio data
*/
generateInWorker(text, options) {
return new Promise((resolve, reject) => {
if (!this.worker || !this.workerReady) {
// Queue the request if worker isn't ready
this.pendingWorkerRequests.push({
text,
options,
resolve,
reject
});
return;
}
// Store the promise callbacks in the current utterance
if (this.currentUtterance) {
this.currentUtterance.resolveGenerate = resolve;
this.currentUtterance.rejectGenerate = reject;
// Send the generation request to the worker
this.worker.postMessage({
type: 'generate',
data: {
text,
voice: options.voice,
speed: options.speed
}
});
} else {
reject(new Error('No current utterance for worker generation'));
}
});
}
/**
* Load the Kokoro script dynamically
* @returns {Promise<void>} - Resolves when script is loaded
*/
loadKokoroScript() {
return new Promise((resolve, reject) => {
// Check if already loaded
if (this.scriptLoaded || typeof window.kokoro !== 'undefined') {
this.scriptLoaded = true;
resolve();
return;
}
// Import the module using dynamic import
import('/js/kokoro-js.js')
.then(module => {
this.scriptLoaded = true;
console.log("Kokoro module structure:", Object.keys(module));
// Store the module in window.kokoro
window.kokoro = module;
if (this.progressCallback) this.progressCallback(30, "Kokoro script loaded");
console.log("Kokoro script loaded successfully via dynamic import");
if (module.KokoroTTS) {
console.log("Found KokoroTTS class in module");
resolve();
} else if (module.TextSplitterStream && typeof module.TextSplitterStream === 'function') {
console.log("Found TextSplitterStream in module, this may be the correct format");
resolve();
} else if (module.pipeline || (module.default && module.default.pipeline)) {
console.log("Found pipeline in module, using Xenova/kokoro-tts format");
window.kokoroTTS = module.default || module;
this.useLegacyFormat = true;
resolve();
} else {
console.error("Kokoro module found but couldn't locate TTS constructor");
reject(new Error('Kokoro TTS API not found in module'));
}
})
.catch(error => {
console.error("Error importing Kokoro module:", error);
reject(error);
});
});
}
/**
* Check if the Kokoro library is loaded correctly and locate the API
* @returns {boolean} - True if API is found
*/
async checkKokoroApi() {
if (this.useLegacyFormat && window.kokoroTTS) {
// Handle legacy format (Xenova/kokoro-tts)
try {
if (this.progressCallback) this.progressCallback(40, "Loading Kokoro model");
this.kokoro = await window.kokoroTTS.pipeline('text-to-speech', {
quantized: true,
progress_callback: (progress) => {
if (this.progressCallback && progress % 10 === 0 || progress === 100) {
const message = `Loading Kokoro model: ${progress}%`;
this.progressCallback(40 + (progress * 0.5), message);
}
}
});
this.legacySpeak = async (text, options) => {
return await this.kokoro(text, options);
};
if (this.progressCallback) this.progressCallback(90, "Kokoro model loaded");
this.isReady = !!this.kokoro;
return this.isReady;
} catch (error) {
console.error("Failed to initialize legacy Kokoro pipeline:", error);
return false;
}
} else if (window.kokoro) {
// Handle newer kokoro-js format
try {
if (window.kokoro.KokoroTTS) {
console.log("Using KokoroTTS class from module");
if (this.progressCallback) this.progressCallback(40, "Creating Kokoro TTS instance");
const model_id = "onnx-community/Kokoro-82M-v1.0-ONNX";
try {
console.log("Starting Kokoro model initialization - this may take some time");
if (this.progressCallback) this.progressCallback(50, "Loading Kokoro model files");
// Use the from_pretrained method to load the model
this.kokoro = await window.kokoro.KokoroTTS.from_pretrained(model_id, {
dtype: "fp32", // Using fp32 for better quality
device: "wasm", // Using wasm for compatibility
onProgress: (progress) => {
if (this.progressCallback) {
const scaledProgress = 40 + (progress * 50);
const intProgress = Math.floor(progress * 100);
if (intProgress % 10 === 0 || intProgress === 100) {
const message = `Loading Kokoro model: ${intProgress}%`;
this.progressCallback(Math.min(90, scaledProgress), message);
}
}
}
});
console.log("Kokoro model initialization complete");
if (this.progressCallback) this.progressCallback(90, "Kokoro model loaded");
// Available voices from the library
this.availableVoices = [
'af_heart', 'af_alloy', 'af_aoede', 'af_bella', 'af_jessica',
'af_kore', 'af_nicole', 'af_nova', 'af_river', 'af_sarah',
'af_sky', 'am_adam', 'am_echo', 'am_eric', 'am_fenrir',
'am_liam', 'am_michael', 'am_onyx', 'am_puck', 'am_santa',
'bf_emma', 'bf_isabella', 'bm_george', 'bm_lewis', 'bf_alice',
'bf_lily', 'bm_daniel', 'bm_fable'
];
console.log("Using available voices:", this.availableVoices);
this.isReady = true;
return true;
} catch (modelError) {
console.error("Failed to initialize Kokoro model:", modelError);
return false;
}
}
console.error("Could not find compatible API in Kokoro module");
return false;
} catch (error) {
console.error("Failed to initialize Kokoro instance:", error);
return false;
}
}
return false;
}
/**
* Check if Kokoro TTS is available
* @returns {boolean} - True if Kokoro is ready to use
*/
isAvailable() {
return this.isReady && this.kokoro !== null;
}
/**
* Check if voice is currently speaking
* @returns {boolean} - True if speaking
*/
isSpeaking() {
return this.currentUtterance !== null;
}
/**
* Speak text using Kokoro TTS
* @param {string} text - The text to speak
* @param {Function} callback - Called when speech completes
*/
async speak(text, callback = null) {
if (!this.isAvailable() || !text) {
if (callback) callback("TTS not available or no text provided");
return;
}
console.log(`Attempting to speak: "${text}" with voice: ${this.voiceOptions.voice}`);
// Stop any current speech
this.stop();
try {
// Process text for better TTS quality
const processedText = this.processText(text);
// Create the utterance object to track state
this.currentUtterance = {
text: processedText,
options: { ...this.voiceOptions },
onComplete: callback
};
if (window.kokoro.KokoroTTS && this.kokoro) {
// Generate audio with the selected voice using the worker
console.log(`Generating audio for text: "${processedText}" with voice: ${this.voiceOptions.voice}`);
try {
// Generate speech in the worker
const audioData = await this.generateInWorker(processedText, {
voice: this.voiceOptions.voice,
speed: this.voiceOptions.speed || 1.0
});
if (!audioData) {
throw new Error("Worker generated no audio data");
}
// Create WAV from Float32Array for browser playback
const wavBlob = this.float32ArrayToWav(audioData.audio, audioData.sampling_rate);
const url = URL.createObjectURL(wavBlob);
const audio = new Audio(url);
// Set up event handlers for the audio element
audio.oncanplay = async () => {
try {
await audio.play();
} catch (playError) {
console.error("Error playing audio:", playError);
if (this.currentUtterance && this.currentUtterance.onComplete) {
this.currentUtterance.onComplete("Play error: " + playError.message);
}
URL.revokeObjectURL(url);
}
};
audio.onended = () => {
if (this.currentUtterance && this.currentUtterance.onComplete) {
this.currentUtterance.onComplete("Playback completed successfully");
}
this.currentUtterance = null;
URL.revokeObjectURL(url);
};
audio.onerror = (error) => {
console.error("Error playing Kokoro audio:", error);
if (this.currentUtterance && this.currentUtterance.onComplete) {
this.currentUtterance.onComplete("Audio error: " + (error.message || "unknown error"));
}
this.currentUtterance = null;
URL.revokeObjectURL(url);
};
// Store the audio element for stopping later
this.currentUtterance.audio = audio;
} catch (genError) {
console.error("Error generating audio:", genError);
if (callback) callback("Generate error: " + genError.message);
this.currentUtterance = null;
}
} else {
console.error("Kokoro TTS not properly initialized");
if (callback) callback("Kokoro not initialized");
this.currentUtterance = null;
}
} catch (error) {
console.error('Error speaking with Kokoro TTS:', error);
if (callback) callback("Error: " + error.message);
this.currentUtterance = null;
}
}
/**
* Stop any ongoing speech
*/
stop() {
if (this.currentUtterance && this.currentUtterance.audio) {
try {
this.currentUtterance.audio.pause();
this.currentUtterance.audio.currentTime = 0;
} catch (error) {
console.error('Error stopping Kokoro TTS:', error);
}
// Call the completion callback if it exists
if (this.currentUtterance.onComplete) {
this.currentUtterance.onComplete();
}
this.currentUtterance = null;
}
}
/**
* Set the speech rate/speed
* @param {number} speed - Speech rate (0.5-2.0)
*/
setSpeed(speed) {
// Ensure speed is within valid range
const normalizedSpeed = Math.max(0.5, Math.min(2.0, speed));
this.voiceOptions.speed = normalizedSpeed;
}
/**
* Set the voice to use
* @param {string} voice - Voice identifier
*/
setVoice(voice) {
if (voice && typeof voice === 'string') {
this.voiceOptions.voice = voice;
}
}
/**
* Get available voices
* @returns {Promise<Array>} - Array of available voices
*/
async getVoices() {
if (!this.isAvailable()) return [];
try {
// Return the manually collected list of voices
if (this.availableVoices && this.availableVoices.length > 0) {
return this.availableVoices.map(id => ({
id: id,
name: this.formatVoiceId(id)
}));
}
// Fallback to hardcoded list if needed
return [
{ id: 'af_heart', name: 'Heart (Female)' },
{ id: 'af_bella', name: 'Bella (Female)' },
{ id: 'am_michael', name: 'Michael (Male)' },
{ id: 'bf_emma', name: 'Emma (British Female)' },
{ id: 'bm_george', name: 'George (British Male)' }
];
} catch (error) {
console.error('Error getting Kokoro voices:', error);
return [];
}
}
/**
* Format a voice ID into a readable name
* @param {string} voiceId - The voice ID to format
* @returns {string} - Formatted voice name
*/
formatVoiceId(voiceId) {
if (!voiceId) return "Unknown Voice";
// Convert id like "en_us_female" to "English (US) - Female"
const parts = voiceId.split('_');
if (parts.length >= 3) {
// Try to map language codes
const languageMap = {
'en': 'English',
'fr': 'French',
'de': 'German',
'es': 'Spanish',
'it': 'Italian',
'pt': 'Portuguese',
'pl': 'Polish',
'ja': 'Japanese',
'ko': 'Korean',
'zh': 'Chinese',
'ru': 'Russian'
};
// Get language name
const langName = languageMap[parts[0]] || parts[0].toUpperCase();
// Get region
const region = parts[1].toUpperCase();
// Get gender or voice type
const voiceType = parts.slice(2).join(' ').replace(/\b\w/g, l => l.toUpperCase());
return `${langName} (${region}) - ${voiceType}`;
}
// For other naming formats, capitalize words
return voiceId.replace(/_/g, ' ').replace(/\b\w/g, l => l.toUpperCase());
}
/**
* Process text for better TTS quality
* @param {string} text - Raw text
* @returns {string} - Processed text
*/
processText(text) {
if (!text) return '';
// Replace specific patterns for better TTS
let processed = text
.replace(/\s+/g, ' ') // Normalize whitespace
.replace(/--/g, '—') // Em dash
.replace(/\.\.\./g, '…') // Ellipsis
.trim();
return processed;
}
/**
* Convert a Float32Array to a WAV format Blob
* @param {Float32Array} float32Array - The audio data as Float32Array
* @param {number} sampleRate - The sample rate of the audio
* @returns {Blob} - A WAV format Blob
*/
float32ArrayToWav(float32Array, sampleRate = 24000) {
// WAV header is 44 bytes
const buffer = new ArrayBuffer(44 + float32Array.length * 4);
const view = new DataView(buffer);
// Write WAV header
// "RIFF" chunk descriptor
this.writeString(view, 0, 'RIFF');
view.setUint32(4, 36 + float32Array.length * 4, true); // File size
this.writeString(view, 8, 'WAVE');
// "fmt " sub-chunk
this.writeString(view, 12, 'fmt ');
view.setUint32(16, 16, true); // Subchunk1Size (16 for PCM)
view.setUint16(20, 3, true); // AudioFormat (3 for IEEE float)
view.setUint16(22, 1, true); // NumChannels (1 for mono)
view.setUint32(24, sampleRate, true); // SampleRate
view.setUint32(28, sampleRate * 4, true); // ByteRate (SampleRate * NumChannels * BitsPerSample/8)
view.setUint16(32, 4, true); // BlockAlign (NumChannels * BitsPerSample/8)
view.setUint16(34, 32, true); // BitsPerSample (32 bits for float)
// "data" sub-chunk
this.writeString(view, 36, 'data');
view.setUint32(40, float32Array.length * 4, true); // Subchunk2Size
// Write the Float32Array data
const offset = 44;
for (let i = 0; i < float32Array.length; i++) {
view.setFloat32(offset + i * 4, float32Array[i], true);
}
// Create a Blob from the buffer
return new Blob([buffer], { type: 'audio/wav' });
}
/**
* Write a string to a DataView
* @param {DataView} view - The DataView to write to
* @param {number} offset - The offset to write at
* @param {string} string - The string to write
*/
writeString(view, offset, string) {
for (let i = 0; i < string.length; i++) {
view.setUint8(offset + i, string.charCodeAt(i));
}
}
/**
* Clean up resources when this handler is no longer needed
*/
dispose() {
this.stop();
// Terminate the worker if it exists
if (this.worker) {
this.worker.terminate();
this.worker = null;
}
// Clean up other resources
this.kokoro = null;
this.isReady = false;
}
}