333 lines
11 KiB
JavaScript
333 lines
11 KiB
JavaScript
/**
|
|
* ElevenLabs TTS Handler
|
|
* Provides TTS via ElevenLabs API
|
|
*/
|
|
import { ApiTTSHandlerBase } from './api-tts-handler-base.js';
|
|
import { moduleRegistry } from './module-registry.js';
|
|
|
|
export class ElevenLabsTTSHandler extends ApiTTSHandlerBase {
|
|
constructor() {
|
|
super('elevenlabs', 'ElevenLabs TTS');
|
|
|
|
// Voice options specific to ElevenLabs
|
|
this.voiceOptions = {
|
|
voice: 'pNInz6obpgDQGcFmaJgB', // Default voice ID for ElevenLabs
|
|
model: 'eleven_multilingual_v2', // Use the multilingual model
|
|
speed: 1.0
|
|
};
|
|
|
|
// Bind methods
|
|
this.bindMethods([
|
|
'initialize',
|
|
'speak',
|
|
'speakPreloaded',
|
|
'preloadSpeech',
|
|
'stop',
|
|
'isAvailable',
|
|
'getId',
|
|
'getVoices',
|
|
'setVoiceOptions',
|
|
'getModule',
|
|
'setupVoiceFromPreferences',
|
|
'loadVoices',
|
|
'selectVoiceForLocale',
|
|
'selectDefaultVoice',
|
|
'generateSpeechAudio',
|
|
'getDefaultApiBaseUrl'
|
|
]);
|
|
}
|
|
|
|
/**
|
|
* Initialize the ElevenLabs TTS handler
|
|
* @param {Function} progressCallback - Callback for progress updates
|
|
* @returns {Promise<boolean>} - Resolves with success status
|
|
*/
|
|
async initialize(progressCallback = null) {
|
|
try {
|
|
if (progressCallback) {
|
|
progressCallback(10, 'Initializing ElevenLabs TTS');
|
|
}
|
|
|
|
// Call parent initialize method
|
|
const initSuccess = await super.initialize(progressCallback);
|
|
|
|
if (!initSuccess) {
|
|
return false;
|
|
}
|
|
|
|
if (progressCallback) {
|
|
progressCallback(40, 'ElevenLabs TTS dependencies loaded');
|
|
}
|
|
|
|
// Set default voices in case API call fails
|
|
this.voices = [
|
|
{ id: 'pNInz6obpgDQGcFmaJgB', name: 'Rachel', language: 'en' },
|
|
{ id: '21m00Tcm4TlvDq8ikWAM', name: 'Adam', language: 'en' },
|
|
{ id: 'AZnzlk1XvdvUeBnXmlld', name: 'Antoni', language: 'en' },
|
|
{ id: 'EXAVITQu4vr4xnSDxMaL', name: 'Bella', language: 'en' },
|
|
{ id: 'ErXwobaYiN019PkySvjV', name: 'Daniel', language: 'en' }
|
|
];
|
|
|
|
// Load voice preferences
|
|
const persistenceManager = this.getModule('persistence-manager');
|
|
if (persistenceManager) {
|
|
// Load model preference
|
|
const model = persistenceManager.getPreference('tts', 'elevenlabs_model', 'eleven_multilingual_v2');
|
|
if (model) {
|
|
this.voiceOptions.model = model;
|
|
}
|
|
|
|
// Load voice preference
|
|
const voice = persistenceManager.getPreference('tts', 'elevenlabs_voice');
|
|
if (voice) {
|
|
this.voiceOptions.voice = voice;
|
|
}
|
|
}
|
|
|
|
if (progressCallback) {
|
|
progressCallback(60, 'ElevenLabs TTS preferences loaded');
|
|
}
|
|
|
|
// Only attempt to load voices from API if we have an API key
|
|
if (this.apiKey) {
|
|
try {
|
|
await this.loadVoices();
|
|
console.log(`ElevenLabs TTS: Loaded ${this.voices.length} voices from API`);
|
|
} catch (error) {
|
|
console.warn('ElevenLabs TTS: Could not load voices from API, using defaults');
|
|
// Don't fail initialization, we already have default voices
|
|
}
|
|
} else {
|
|
console.log('ElevenLabs TTS: No API key provided, using default voices');
|
|
// Mark as available but not fully functional
|
|
this.available = true;
|
|
}
|
|
|
|
if (progressCallback) {
|
|
progressCallback(80, `ElevenLabs TTS loaded ${this.voices.length} voices`);
|
|
}
|
|
|
|
// Set voice based on locale
|
|
const localization = this.getModule('localization');
|
|
if (localization) {
|
|
const locale = localization.getLocale();
|
|
console.log(`ElevenLabs TTS: Setting voice for locale: ${locale}`);
|
|
this.selectVoiceForLocale(locale);
|
|
} else {
|
|
this.selectDefaultVoice();
|
|
}
|
|
|
|
// Mark as ready even if we're using default voices
|
|
this.isReady = true;
|
|
|
|
if (progressCallback) {
|
|
progressCallback(100, 'ElevenLabs TTS initialized');
|
|
}
|
|
|
|
return true;
|
|
} catch (error) {
|
|
console.error('ElevenLabs TTS: Initialization error:', error);
|
|
if (progressCallback) {
|
|
progressCallback(100, `ElevenLabs TTS initialization failed - ${error.message}`);
|
|
}
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get the default API base URL for ElevenLabs
|
|
* @returns {string} - Default API base URL
|
|
*/
|
|
getDefaultApiBaseUrl() {
|
|
return 'https://api.elevenlabs.io/v1';
|
|
}
|
|
|
|
/**
|
|
* Load available voices from ElevenLabs API
|
|
* @returns {Promise<boolean>} - Resolves with success status
|
|
*/
|
|
async loadVoices() {
|
|
if (!this.apiKey) {
|
|
console.log('ElevenLabs TTS: No API key provided, skipping voice loading');
|
|
// Return true to indicate initialization was successful, even without voices
|
|
// This allows the handler to appear in the dropdown for configuration
|
|
return true;
|
|
}
|
|
|
|
try {
|
|
const response = await fetch(`${this.apiBaseUrl}/voices`, {
|
|
method: 'GET',
|
|
headers: {
|
|
'Content-Type': 'application/json',
|
|
'xi-api-key': this.apiKey
|
|
}
|
|
});
|
|
|
|
if (!response.ok) {
|
|
throw new Error(`API error: ${response.status} ${response.statusText}`);
|
|
}
|
|
|
|
const data = await response.json();
|
|
|
|
if (data && data.voices && Array.isArray(data.voices)) {
|
|
this.voices = data.voices.map(voice => ({
|
|
id: voice.voice_id,
|
|
name: voice.name,
|
|
language: voice.labels?.language || 'unknown'
|
|
}));
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
} catch (error) {
|
|
console.error('ElevenLabs TTS: Error loading voices:', error);
|
|
return true; // Still return true to allow the handler to be configured
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Select a voice for the given locale
|
|
* @param {string} locale - Locale code
|
|
* @returns {boolean} - Success status
|
|
*/
|
|
selectVoiceForLocale(locale) {
|
|
if (!this.voices || this.voices.length === 0) {
|
|
return this.selectDefaultVoice();
|
|
}
|
|
|
|
// Extract language code from locale (e.g., 'en-US' -> 'en')
|
|
const langCode = locale.split('-')[0].toLowerCase();
|
|
|
|
// Find a voice that matches the language code
|
|
const matchingVoice = this.voices.find(voice => {
|
|
if (voice.language && voice.language !== 'unknown') {
|
|
return voice.language.toLowerCase() === langCode;
|
|
}
|
|
return false;
|
|
});
|
|
|
|
if (matchingVoice) {
|
|
this.voiceOptions.voice = matchingVoice.id;
|
|
return true;
|
|
}
|
|
|
|
// If no match, use default
|
|
return this.selectDefaultVoice();
|
|
}
|
|
|
|
/**
|
|
* Select a default voice
|
|
* @returns {boolean} - Success status
|
|
*/
|
|
selectDefaultVoice() {
|
|
// If we have voices, use the first one
|
|
if (this.voices && this.voices.length > 0) {
|
|
this.voiceOptions.voice = this.voices[0].id;
|
|
return true;
|
|
}
|
|
|
|
// Use hardcoded default voice ID
|
|
this.voiceOptions.voice = 'pNInz6obpgDQGcFmaJgB';
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Generate speech audio data using ElevenLabs API
|
|
* @param {string} text - Text to generate speech for
|
|
* @returns {Promise<Object>} - Audio data (Blob)
|
|
*/
|
|
async generateSpeechAudio(text) {
|
|
// Don't attempt to call the API if no API key is set or text is empty
|
|
if (!text || !this.apiKey || this.apiKey.trim() === '') {
|
|
console.log('ElevenLabs TTS: No API key provided or empty text, skipping API call');
|
|
return null;
|
|
}
|
|
|
|
try {
|
|
// Create request payload
|
|
const payload = {
|
|
text: text,
|
|
model_id: this.voiceOptions.model || 'eleven_multilingual_v2',
|
|
voice_settings: {
|
|
stability: 0.5,
|
|
similarity_boost: 0.75,
|
|
style: 0.0,
|
|
use_speaker_boost: true,
|
|
speed: this.voiceOptions.speed || 1.0
|
|
}
|
|
};
|
|
|
|
// Make API request
|
|
const response = await fetch(`${this.apiBaseUrl}/text-to-speech/${this.voiceOptions.voice}?optimize_streaming_latency=0`, {
|
|
method: 'POST',
|
|
headers: {
|
|
'Content-Type': 'application/json',
|
|
'xi-api-key': this.apiKey,
|
|
'Accept': 'audio/wav'
|
|
},
|
|
body: JSON.stringify(payload)
|
|
});
|
|
|
|
if (!response.ok) {
|
|
throw new Error(`API error: ${response.status} ${response.statusText}`);
|
|
}
|
|
|
|
// Get audio blob from response
|
|
const audioBlob = await response.blob();
|
|
|
|
// Ensure it's treated as WAV
|
|
return new Blob([audioBlob], { type: 'audio/wav' });
|
|
} catch (error) {
|
|
console.error('ElevenLabs TTS: Error generating speech:', error);
|
|
return null;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get available voices
|
|
* @returns {Promise<Array>} - Resolves with array of voice objects
|
|
*/
|
|
async getVoices() {
|
|
if (!this.available) {
|
|
return [];
|
|
}
|
|
|
|
// If voices are already loaded, return them
|
|
if (this.voices && this.voices.length > 0) {
|
|
return this.voices;
|
|
}
|
|
|
|
// Otherwise try to load voices
|
|
try {
|
|
await this.loadVoices();
|
|
return this.voices || [];
|
|
} catch (error) {
|
|
console.error('ElevenLabs TTS: Error getting voices:', error);
|
|
return [];
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Set voice options
|
|
* @param {Object} options - Voice options
|
|
*/
|
|
setVoiceOptions(options = {}) {
|
|
// Call parent method for common options
|
|
super.setVoiceOptions(options);
|
|
|
|
// Handle ElevenLabs-specific options
|
|
if (options.model) {
|
|
this.voiceOptions.model = options.model;
|
|
|
|
// Save the model preference
|
|
const persistenceManager = this.getModule('persistence-manager');
|
|
if (persistenceManager) {
|
|
persistenceManager.updatePreference('tts', 'elevenlabs_model', options.model);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Create the singleton instance
|
|
const ElevenLabsTTS = new ElevenLabsTTSHandler();
|