291 lines
10 KiB
JavaScript
291 lines
10 KiB
JavaScript
/**
|
|
* ElevenLabsTTSModule
|
|
* Provides TTS via ElevenLabs API
|
|
*/
|
|
import { ApiTTSModuleBase } from './api-tts-module-base.js';
|
|
|
|
export class ElevenLabsTTSModule extends ApiTTSModuleBase {
|
|
constructor() {
|
|
super('elevenlabs-tts', 'ElevenLabs TTS');
|
|
|
|
// Voice options specific to ElevenLabs
|
|
this.voiceOptions = {
|
|
voice: 'pNInz6obpgDQGcFmaJgB', // Default voice ID for ElevenLabs
|
|
model: 'eleven_multilingual_v2', // Use the multilingual model
|
|
speed: 1.0
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Initialize the ElevenLabs TTS module
|
|
* @returns {Promise<boolean>} - Resolves with success status
|
|
*/
|
|
async initialize() {
|
|
try {
|
|
this.reportProgress(10, 'Initializing ElevenLabs TTS');
|
|
|
|
// Initialize parent
|
|
const parentInit = await super.initialize();
|
|
if (!parentInit) {
|
|
console.error('ElevenLabs TTS: Parent initialization failed');
|
|
return false;
|
|
}
|
|
|
|
// Get required dependencies
|
|
const persistenceManager = this.getModule('persistence-manager');
|
|
if (!persistenceManager) {
|
|
console.error('ElevenLabs TTS: Required dependency persistence-manager not found');
|
|
return false;
|
|
}
|
|
|
|
// API key is already loaded in parent initialize() method
|
|
// Just check if it's available
|
|
if (!this.apiKey) {
|
|
console.info('ElevenLabs TTS: API key not configured; provider unavailable until configured');
|
|
this.isReady = false;
|
|
this.reportProgress(100, 'ElevenLabs TTS not configured');
|
|
return true;
|
|
}
|
|
|
|
// Load voices from ElevenLabs
|
|
try {
|
|
this.reportProgress(50, 'Loading ElevenLabs voices');
|
|
const voicesLoaded = await this.loadVoices(this.apiKey);
|
|
if (!voicesLoaded) {
|
|
this.isReady = false;
|
|
this.reportProgress(100, 'ElevenLabs TTS not ready');
|
|
return true;
|
|
}
|
|
} catch (error) {
|
|
console.error('ElevenLabs TTS: Failed to load voices:', error);
|
|
this.isReady = false;
|
|
return true;
|
|
}
|
|
|
|
// Load preferences
|
|
const preferredVoice = persistenceManager.getPreference('tts', `${this.id}_voice`, this.voiceOptions.voice);
|
|
if (preferredVoice) {
|
|
this.voiceOptions.voice = preferredVoice;
|
|
}
|
|
|
|
const preferredModel = persistenceManager.getPreference('tts', `${this.id}_model`, this.voiceOptions.model);
|
|
if (preferredModel) {
|
|
this.voiceOptions.model = preferredModel;
|
|
}
|
|
|
|
const preferredSpeed = persistenceManager.getPreference('tts', 'speed', this.voiceOptions.speed);
|
|
if (typeof preferredSpeed === 'number') {
|
|
this.voiceOptions.speed = this.normalizeAppSpeed(preferredSpeed);
|
|
}
|
|
|
|
this.isReady = true;
|
|
this.reportProgress(100, 'ElevenLabs TTS initialized');
|
|
return true;
|
|
} catch (error) {
|
|
console.error('ElevenLabs TTS: Initialization error:', error);
|
|
this.isReady = false;
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get the default API base URL for ElevenLabs
|
|
* @returns {string} - Default API base URL
|
|
*/
|
|
getDefaultApiBaseUrl() {
|
|
return 'https://api.elevenlabs.io/v1';
|
|
}
|
|
|
|
/**
|
|
* Load available voices from ElevenLabs API
|
|
* @param {string} apiKey - API key for authentication
|
|
* @returns {Promise<boolean>} - Resolves with success status
|
|
*/
|
|
async loadVoices(apiKey) {
|
|
// Set default voices that will be used if API call fails
|
|
this.voices = [
|
|
{ id: 'pNInz6obpgDQGcFmaJgB', name: 'Rachel', language: 'en' },
|
|
{ id: '21m00Tcm4TlvDq8ikWAM', name: 'Adam', language: 'en' },
|
|
{ id: 'AZnzlk1XvdvUeBnXmlld', name: 'Antoni', language: 'en' },
|
|
{ id: 'EXAVITQu4vr4xnSDxMaL', name: 'Bella', language: 'en' },
|
|
{ id: 'ErXwobaYiN019PkySvjV', name: 'Daniel', language: 'en' }
|
|
];
|
|
|
|
// Only load from API if we have an API key
|
|
if (!apiKey) {
|
|
return true;
|
|
}
|
|
|
|
try {
|
|
const response = await fetch(`${this.apiBaseUrl}/voices`, {
|
|
method: 'GET',
|
|
headers: {
|
|
'Accept': 'application/json',
|
|
'xi-api-key': apiKey
|
|
}
|
|
});
|
|
|
|
if (!response.ok) {
|
|
console.error(`ElevenLabs TTS: API error ${response.status} ${response.statusText}`);
|
|
return false;
|
|
}
|
|
|
|
const data = await response.json();
|
|
if (data && data.voices && Array.isArray(data.voices)) {
|
|
// Map API voices to our format
|
|
this.voices = data.voices.map(voice => ({
|
|
id: voice.voice_id,
|
|
name: voice.name,
|
|
language: voice.language || 'en',
|
|
gender: 'unknown',
|
|
preview_url: voice.preview_url
|
|
}));
|
|
|
|
return true;
|
|
}
|
|
|
|
return this.voices.length > 0;
|
|
} catch (error) {
|
|
console.error('ElevenLabs TTS: Error loading voices:', error);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Select a voice for the given locale
|
|
* @param {string} locale - Locale code
|
|
* @returns {boolean} - Success status
|
|
*/
|
|
selectVoiceForLocale(locale) {
|
|
// Extract language code from locale (e.g., 'en-US' -> 'en')
|
|
const langCode = locale.split('-')[0].toLowerCase();
|
|
|
|
// For English locales, select 'Rachel' if available
|
|
if (langCode === 'en') {
|
|
const defaultVoice = this.voices.find(v => v.id === 'pNInz6obpgDQGcFmaJgB');
|
|
if (defaultVoice) {
|
|
this.voiceOptions.voice = defaultVoice.id;
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return this.selectDefaultVoice();
|
|
}
|
|
|
|
/**
|
|
* Generate speech audio data using ElevenLabs API
|
|
* @param {string} text - Text to generate speech for
|
|
* @returns {Promise<Object>} - Audio data object
|
|
*/
|
|
async generateSpeechAudio(text, options = {}) {
|
|
// Don't attempt to call the API if no API key is set or text is empty
|
|
if (!text || !this.apiKey) {
|
|
return { success: false, reason: 'missing_api_key_or_text' };
|
|
}
|
|
|
|
try {
|
|
// Process the text
|
|
const processedText = this.preprocessText(text);
|
|
|
|
// Create request payload
|
|
const payload = {
|
|
text: processedText,
|
|
model_id: this.voiceOptions.model || 'eleven_multilingual_v2',
|
|
voice_settings: {
|
|
stability: 0.5,
|
|
similarity_boost: 0.75,
|
|
style: 0.0,
|
|
use_speaker_boost: true,
|
|
speed: this.getApiSpeed(this.voiceOptions.speed)
|
|
}
|
|
};
|
|
|
|
// Make API request
|
|
const response = await fetch(`${this.apiBaseUrl}/text-to-speech/${this.voiceOptions.voice}?optimize_streaming_latency=0`, {
|
|
method: 'POST',
|
|
headers: {
|
|
'Content-Type': 'application/json',
|
|
'xi-api-key': this.apiKey,
|
|
'Accept': 'audio/mpeg'
|
|
},
|
|
body: JSON.stringify(payload),
|
|
signal: options.signal
|
|
});
|
|
|
|
if (!response.ok) {
|
|
const errorText = await response.text();
|
|
console.error(`ElevenLabs API error ${response.status}: ${errorText}`);
|
|
throw new Error(`API error: ${response.status} ${response.statusText} - ${errorText}`);
|
|
}
|
|
|
|
// Get audio blob from response
|
|
const audioBlob = await response.blob();
|
|
|
|
// Convert to array buffer for consistency with other modules
|
|
const arrayBuffer = await audioBlob.arrayBuffer();
|
|
|
|
return {
|
|
success: true,
|
|
audioData: arrayBuffer
|
|
};
|
|
} catch (error) {
|
|
console.error('ElevenLabs TTS: Error generating speech:', error);
|
|
return {
|
|
success: false,
|
|
reason: 'api_error',
|
|
error: error.message
|
|
};
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Set voice options
|
|
* @param {Object} options - Voice options
|
|
*/
|
|
setVoiceOptions(options = {}) {
|
|
// Call parent method for common options
|
|
if (options.voice) {
|
|
this.voiceOptions.voice = options.voice;
|
|
|
|
// Save voice preference
|
|
const persistenceManager = this.getModule('persistence-manager');
|
|
if (persistenceManager) {
|
|
persistenceManager.updatePreference('tts', 'elevenlabs_voice', options.voice);
|
|
}
|
|
}
|
|
|
|
if (typeof options.speed === 'number') {
|
|
this.voiceOptions.speed = this.normalizeAppSpeed(options.speed);
|
|
}
|
|
|
|
// Handle ElevenLabs-specific options
|
|
if (options.model) {
|
|
this.voiceOptions.model = options.model;
|
|
|
|
// Save model preference
|
|
const persistenceManager = this.getModule('persistence-manager');
|
|
if (persistenceManager) {
|
|
persistenceManager.updatePreference('tts', 'elevenlabs_model', options.model);
|
|
}
|
|
}
|
|
}
|
|
|
|
getApiSpeed(speed) {
|
|
const appSpeed = this.normalizeAppSpeed(speed);
|
|
if (appSpeed <= 1.0) {
|
|
return 0.7 + ((appSpeed - 0.5) / 0.5) * 0.3;
|
|
}
|
|
|
|
return 1.0 + (appSpeed - 1.0) * 0.2;
|
|
}
|
|
|
|
normalizeAppSpeed(speed) {
|
|
const value = Number.isFinite(Number(speed)) ? Number(speed) : 1.0;
|
|
return Math.max(0.5, Math.min(2.0, value));
|
|
}
|
|
}
|
|
|
|
const elevenLabsTTSModule = new ElevenLabsTTSModule();
|
|
|
|
export { elevenLabsTTSModule };
|