Fix Kokoro TTS integration issues: Remove API key requirement and ensure system-specific options display correctly

This commit is contained in:
2025-04-05 22:06:22 +00:00
parent e5a3016846
commit fc693ae695
11 changed files with 3296 additions and 596 deletions
+270
View File
@@ -0,0 +1,270 @@
/**
* ElevenLabsTTSModule
* Provides TTS via ElevenLabs API
*/
import { ApiTTSModuleBase } from './api-tts-module-base.js';
export class ElevenLabsTTSModule extends ApiTTSModuleBase {
constructor() {
super('elevenlabs', 'ElevenLabs TTS');
// Voice options specific to ElevenLabs
this.voiceOptions = {
voice: 'pNInz6obpgDQGcFmaJgB', // Default voice ID for ElevenLabs
model: 'eleven_multilingual_v2', // Use the multilingual model
speed: 1.0
};
}
/**
* Initialize the ElevenLabs TTS module
* @returns {Promise<boolean>} - Resolves with success status
*/
async initialize() {
try {
this.reportProgress(10, 'Initializing ElevenLabs TTS');
// Initialize parent
const parentInit = await super.initialize();
if (!parentInit) {
console.error('ElevenLabs TTS: Parent initialization failed');
return false;
}
// Get required dependencies
const persistenceManager = this.getModule('persistence-manager');
if (!persistenceManager) {
console.error('ElevenLabs TTS: Required dependency persistence-manager not found');
return false;
}
// Check for API key
const apiKey = persistenceManager.getPreference('elevenlabs', 'api_key', '');
if (!apiKey) {
console.error('ElevenLabs TTS: API key not configured');
return false;
}
// Load voices from ElevenLabs
try {
this.reportProgress(50, 'Loading ElevenLabs voices');
await this.loadVoices(apiKey);
} catch (error) {
console.error('ElevenLabs TTS: Failed to load voices:', error);
return false;
}
// Load preferences
const preferredVoice = persistenceManager.getPreference('elevenlabs', 'voice', this.voiceOptions.voice);
if (preferredVoice) {
this.voiceOptions.voice = preferredVoice;
}
const preferredModel = persistenceManager.getPreference('elevenlabs', 'model', this.voiceOptions.model);
if (preferredModel) {
this.voiceOptions.model = preferredModel;
}
const preferredSpeed = persistenceManager.getPreference('elevenlabs', 'speed', this.voiceOptions.speed);
if (typeof preferredSpeed === 'number') {
this.voiceOptions.speed = preferredSpeed;
}
this.isReady = true;
this.reportProgress(100, 'ElevenLabs TTS initialized');
return true;
} catch (error) {
console.error('ElevenLabs TTS: Initialization error:', error);
this.isReady = false;
return false;
}
}
/**
* Get the default API base URL for ElevenLabs
* @returns {string} - Default API base URL
*/
getDefaultApiBaseUrl() {
return 'https://api.elevenlabs.io/v1';
}
/**
* Load available voices from ElevenLabs API
* @param {string} apiKey - API key for authentication
* @returns {Promise<boolean>} - Resolves with success status
*/
async loadVoices(apiKey) {
// Set default voices that will be used if API call fails
this.voices = [
{ id: 'pNInz6obpgDQGcFmaJgB', name: 'Rachel', language: 'en' },
{ id: '21m00Tcm4TlvDq8ikWAM', name: 'Adam', language: 'en' },
{ id: 'AZnzlk1XvdvUeBnXmlld', name: 'Antoni', language: 'en' },
{ id: 'EXAVITQu4vr4xnSDxMaL', name: 'Bella', language: 'en' },
{ id: 'ErXwobaYiN019PkySvjV', name: 'Daniel', language: 'en' }
];
// Only load from API if we have an API key
if (!apiKey) {
return true;
}
try {
const response = await fetch(`${this.apiBaseUrl}/voices`, {
method: 'GET',
headers: {
'xi-api-key': apiKey,
'Content-Type': 'application/json'
}
});
if (!response.ok) {
console.error(`ElevenLabs TTS: API error: ${response.status} ${response.statusText}`);
return true; // Use defaults, but don't fail initialization
}
const data = await response.json();
if (data && data.voices && Array.isArray(data.voices)) {
// Transform API response to our internal format
this.voices = data.voices.map(voice => ({
id: voice.voice_id,
name: voice.name,
language: 'en', // ElevenLabs doesn't provide language info
preview: voice.preview_url
}));
return true;
}
} catch (error) {
console.error('ElevenLabs TTS: Error loading voices:', error);
}
// If API call failed, we still return true since we have default voices
return true;
}
/**
* Select a voice for the given locale
* @param {string} locale - Locale code
* @returns {boolean} - Success status
*/
selectVoiceForLocale(locale) {
if (!this.voices || this.voices.length === 0) {
return this.selectDefaultVoice();
}
// ElevenLabs doesn't provide language info for voices
// Simply use the first voice as default
return this.selectDefaultVoice();
}
/**
* Generate speech audio data using ElevenLabs API
* @param {string} text - Text to generate speech for
* @returns {Promise<Object>} - Audio data object
*/
async generateSpeechAudio(text) {
// Don't attempt to call the API if no API key is set or text is empty
if (!text || !this.apiKey) {
return { success: false, reason: 'missing_api_key_or_text' };
}
try {
// Process the text
const processedText = this.preprocessText(text);
// Create request payload
const payload = {
text: processedText,
model_id: this.voiceOptions.model || 'eleven_multilingual_v2',
voice_settings: {
stability: 0.5,
similarity_boost: 0.75,
style: 0.0,
use_speaker_boost: true,
speed: this.voiceOptions.speed || 1.0
}
};
// Make API request
const response = await fetch(`${this.apiBaseUrl}/text-to-speech/${this.voiceOptions.voice}?optimize_streaming_latency=0`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'xi-api-key': this.apiKey,
'Accept': 'audio/wav'
},
body: JSON.stringify(payload)
});
if (!response.ok) {
throw new Error(`API error: ${response.status} ${response.statusText}`);
}
// Get audio blob from response
const audioBlob = await response.blob();
// Convert to array buffer for consistency with other modules
const arrayBuffer = await audioBlob.arrayBuffer();
return {
success: true,
audioData: arrayBuffer
};
} catch (error) {
console.error('ElevenLabs TTS: Error generating speech:', error);
return {
success: false,
reason: 'api_error',
error: error.message
};
}
}
/**
* Set voice options
* @param {Object} options - Voice options
*/
setVoiceOptions(options = {}) {
// Call parent method for common options
if (options.voice) {
this.voiceOptions.voice = options.voice;
// Save voice preference
const persistenceManager = this.getModule('persistence-manager');
if (persistenceManager) {
persistenceManager.updatePreference('tts', 'elevenlabs_voice', options.voice);
}
}
if (typeof options.speed === 'number') {
this.voiceOptions.speed = Math.max(0.5, Math.min(2.0, options.speed));
}
// Handle ElevenLabs-specific options
if (options.model) {
this.voiceOptions.model = options.model;
// Save model preference
const persistenceManager = this.getModule('persistence-manager');
if (persistenceManager) {
persistenceManager.updatePreference('tts', 'elevenlabs_model', options.model);
}
}
}
}
// Register the module with the module registry
// Module registry MUST be accessed via window, not direct import
if (window.moduleRegistry) {
try {
// Create instance first, then register it
const elevenLabsTTSModule = new ElevenLabsTTSModule();
window.moduleRegistry.register(elevenLabsTTSModule);
console.log('ElevenLabs TTS Module registered successfully');
} catch (err) {
console.error('Failed to register ElevenLabs TTS Module:', err);
}
} else {
console.error('Module registry not available when attempting to register ElevenLabs TTS Module');
}