/** * OpenAITTSModule * Provides TTS via OpenAI API */ import { ApiTTSModuleBase } from './api-tts-module-base.js'; export class OpenAITTSModule extends ApiTTSModuleBase { constructor() { super('openai-tts', 'OpenAI TTS'); this.supportedModels = [ { id: 'tts-1', name: 'TTS-1' }, { id: 'tts-1-hd', name: 'TTS-1 HD' }, { id: 'gpt-4o-mini-tts', name: 'GPT-4o mini TTS' } ]; this.legacyVoices = [ { id: 'alloy', name: 'Alloy', language: 'en' }, { id: 'ash', name: 'Ash', language: 'en' }, { id: 'coral', name: 'Coral', language: 'en' }, { id: 'echo', name: 'Echo', language: 'en' }, { id: 'fable', name: 'Fable', language: 'en' }, { id: 'nova', name: 'Nova', language: 'en' }, { id: 'onyx', name: 'Onyx', language: 'en' }, { id: 'sage', name: 'Sage', language: 'en' }, { id: 'shimmer', name: 'Shimmer', language: 'en' } ]; this.gpt4oMiniVoices = [ { id: 'alloy', name: 'Alloy', language: 'en' }, { id: 'ash', name: 'Ash', language: 'en' }, { id: 'ballad', name: 'Ballad', language: 'en' }, { id: 'coral', name: 'Coral', language: 'en' }, { id: 'echo', name: 'Echo', language: 'en' }, { id: 'fable', name: 'Fable', language: 'en' }, { id: 'nova', name: 'Nova', language: 'en' }, { id: 'onyx', name: 'Onyx', language: 'en' }, { id: 'sage', name: 'Sage', language: 'en' }, { id: 'shimmer', name: 'Shimmer', language: 'en' }, { id: 'verse', name: 'Verse', language: 'en' }, { id: 'marin', name: 'Marin', language: 'en' }, { id: 'cedar', name: 'Cedar', language: 'en' } ]; this.supportedVoices = [...this.gpt4oMiniVoices]; this.supportsTtsInstructions = true; // Voice options specific to OpenAI this.voiceOptions = { voice: 'alloy', // Default voice for OpenAI model: 'tts-1-hd', // Standard model speed: 1.0, response_format: 'mp3' // OpenAI supports mp3, opus, aac, and flac (not wav) }; // OpenAI has a documented fixed voice set for this speech endpoint. this.voices = [...this.supportedVoices]; } /** * Get the default API base URL for OpenAI * @returns {string} - Default API base URL */ getDefaultApiBaseUrl() { return 'https://api.openai.com/v1'; } /** * Initialize the module * @returns {Promise} - Resolves with success status */ async initialize() { try { this.reportProgress(10, 'Initializing OpenAI TTS'); // Initialize parent const parentInit = await super.initialize(); if (!parentInit) { console.error('OpenAI TTS: Parent initialization failed'); return false; } // Get required dependencies const persistenceManager = this.getModule('persistence-manager'); if (!persistenceManager) { console.error('OpenAI TTS: Required dependency persistence-manager not found'); return false; } // Load preferences const preferredVoice = persistenceManager.getPreference('tts', `${this.id}_voice`, this.voiceOptions.voice); if (preferredVoice) { this.voiceOptions.voice = this.normalizeVoiceId(preferredVoice); } const preferredModel = persistenceManager.getPreference('tts', `${this.id}_model`, this.voiceOptions.model); if (preferredModel) { this.voiceOptions.model = this.normalizeModelId(preferredModel); } this.voices = this.getAvailableVoices(); this.voiceOptions.voice = this.normalizeVoiceId(this.voiceOptions.voice); const preferredSpeed = persistenceManager.getPreference('tts', 'speed', this.voiceOptions.speed); if (typeof preferredSpeed === 'number') { this.voiceOptions.speed = this.normalizeAppSpeed(preferredSpeed); } // API key is already loaded in parent initialize() method. // Model and voice preferences still need to be available for the // options UI even before credentials are configured. if (!this.apiKey) { console.info('OpenAI TTS: API key not configured; provider unavailable until configured'); this.isReady = false; this.reportProgress(100, 'OpenAI TTS not configured'); return true; } const apiReachable = await this.loadVoices(); if (!apiReachable) { this.isReady = false; this.reportProgress(100, 'OpenAI TTS not ready'); return true; } this.isReady = true; this.reportProgress(100, 'OpenAI TTS initialized'); return true; } catch (error) { console.error('OpenAI TTS: Initialization error:', error); this.isReady = false; return false; } } /** * Load available voices * @returns {Promise} - Resolves with success status */ async loadVoices() { // OpenAI exposes a documented fixed TTS voice set, not a voice-list // endpoint. Use /models as a lightweight credential/endpoint check. this.voices = this.getAvailableVoices(); if (!this.apiKey) { return true; } try { const response = await fetch(`${this.apiBaseUrl}/models`, { method: 'GET', headers: { 'Authorization': `Bearer ${this.apiKey}` } }); if (!response.ok) { console.error(`OpenAI TTS: API validation failed ${response.status} ${response.statusText}`); return false; } return true; } catch (error) { console.error('OpenAI TTS: API validation error:', error); return false; } } /** * Select a voice for the given locale * @param {string} locale - Locale code * @returns {boolean} - Success status */ selectVoiceForLocale(locale) { // Extract language code from locale (e.g., 'en-US' -> 'en') const langCode = locale.split('-')[0].toLowerCase(); // All OpenAI voices are English-based // Return default voice return this.selectDefaultVoice(); } /** * Select a default voice * @returns {boolean} - Success status */ selectDefaultVoice() { this.voiceOptions.voice = 'alloy'; // Default voice return true; } /** * Get available voices * @returns {Array} - Array of voice objects */ getAvailableVoices() { this.voices = this.getVoicesForModel(this.voiceOptions.model); return this.voices; } async getVoices() { return this.getAvailableVoices(); } /** * Generate speech audio data using OpenAI API * @param {string} text - Text to generate speech for * @returns {Promise} - Audio data object */ async generateSpeechAudio(text, options = {}) { if (!this.isReady || !this.apiKey) { return { success: false, reason: 'not_ready' }; } try { // Process the text const processedText = this.preprocessText(text); // Create request payload const payload = { model: this.voiceOptions.model || 'tts-1', input: processedText, voice: this.normalizeVoiceId(this.voiceOptions.voice), response_format: this.voiceOptions.response_format || 'mp3', speed: this.getApiSpeed(this.voiceOptions.speed) }; const instructions = this.getRequestInstructions(options); if (instructions) { payload.instructions = instructions; } // Make API request const response = await fetch(`${this.apiBaseUrl}/audio/speech`, { method: 'POST', headers: { 'Content-Type': 'application/json', 'Authorization': `Bearer ${this.apiKey}` }, body: JSON.stringify(payload), signal: options.signal }); if (!response.ok) { const errorText = await response.text(); throw new Error(`API error: ${response.status} ${response.statusText} - ${errorText}`); } // Get audio blob from response const audioBlob = await response.blob(); // Convert to array buffer for consistency with other modules const arrayBuffer = await audioBlob.arrayBuffer(); return { success: true, audioData: arrayBuffer }; } catch (error) { console.error('OpenAI TTS: Error generating speech:', error); return { success: false, reason: 'api_error', error: error.message }; } } /** * Set voice options * @param {Object} options - Voice options */ setVoiceOptions(options = {}) { // Handle common options if (options.voice) { this.voiceOptions.voice = this.normalizeVoiceId(options.voice); // Save voice preference const persistenceManager = this.getModule('persistence-manager'); if (persistenceManager) { persistenceManager.updatePreference('tts', `${this.id}_voice`, this.voiceOptions.voice); } } if (typeof options.speed === 'number') { // OpenAI speech speed uses 1.0 as normal. The app-wide slider also // uses 1.0 as normal, so only clamp at the provider API boundary. this.voiceOptions.speed = this.normalizeAppSpeed(options.speed); } // Handle OpenAI-specific options if (options.model) { this.voiceOptions.model = this.normalizeModelId(options.model); this.voices = this.getAvailableVoices(); this.voiceOptions.voice = this.normalizeVoiceId(this.voiceOptions.voice); // Save the model preference const persistenceManager = this.getModule('persistence-manager'); if (persistenceManager) { persistenceManager.updatePreference('tts', `${this.id}_model`, this.voiceOptions.model); persistenceManager.updatePreference('tts', `${this.id}_voice`, this.voiceOptions.voice); } } if (options.response_format) { // Ensure valid format: mp3, opus, aac, or flac const validFormats = ['mp3', 'opus', 'aac', 'flac']; if (validFormats.includes(options.response_format)) { this.voiceOptions.response_format = options.response_format; // Save the format preference const persistenceManager = this.getModule('persistence-manager'); if (persistenceManager) { persistenceManager.updatePreference('tts', `${this.id}_format`, options.response_format); } } } } getVoiceId(voice) { if (!voice) return ''; if (typeof voice === 'string') return voice; return voice.id || voice.name || ''; } normalizeVoiceId(voice) { const voiceId = this.getVoiceId(voice).toLowerCase(); const supported = new Set(this.getVoicesForModel(this.voiceOptions.model).map(item => item.id)); if (supported.has(voiceId)) { return voiceId; } if (voiceId) { console.warn(`OpenAI TTS: Unsupported voice "${voiceId}", falling back to alloy`); } return 'alloy'; } normalizeModelId(model) { const modelId = String(model || '').trim(); const supported = new Set(this.supportedModels.map(item => item.id)); if (supported.has(modelId)) { return modelId; } if (modelId) { console.warn(`OpenAI TTS: Unsupported model "${modelId}", falling back to tts-1-hd`); } return 'tts-1-hd'; } getVoicesForModel(model) { const modelId = this.normalizeModelId(model || this.voiceOptions.model); if (modelId === 'gpt-4o-mini-tts') { return [...this.gpt4oMiniVoices]; } return [...this.legacyVoices]; } getRequestInstructions(options = {}) { if (this.normalizeModelId(this.voiceOptions.model) !== 'gpt-4o-mini-tts') { return ''; } const instructions = Array.isArray(options.ttsInstructions) ? options.ttsInstructions : []; const matching = instructions .filter(entry => { const provider = String(entry?.provider || '').trim(); return !provider || provider === this.id; }) .map(entry => String(entry?.instruction || '').trim()) .filter(Boolean); return matching.length > 0 ? matching[matching.length - 1] : ''; } getApiSpeed(speed) { const value = Number.isFinite(Number(speed)) ? Number(speed) : this.normalizeAppSpeed(speed); return Math.max(0.25, Math.min(4.0, value)); } normalizeAppSpeed(speed) { const value = Number.isFinite(Number(speed)) ? Number(speed) : 1.0; return Math.max(0.5, Math.min(2.0, value)); } } const openAITTSModule = new OpenAITTSModule(); export { openAITTSModule };