236 lines
7.9 KiB
JavaScript
236 lines
7.9 KiB
JavaScript
/**
|
|
* OpenAI TTS Handler
|
|
* Provides TTS via OpenAI API
|
|
*/
|
|
import { ApiTTSHandlerBase } from './api-tts-handler-base.js';
|
|
|
|
export class OpenAITTSHandler extends ApiTTSHandlerBase {
|
|
constructor() {
|
|
super('openai', 'OpenAI TTS');
|
|
|
|
// Voice options specific to OpenAI
|
|
this.voiceOptions = {
|
|
voice: 'alloy', // Default voice for OpenAI
|
|
model: 'tts-1', // Standard model
|
|
speed: 1.0,
|
|
response_format: 'mp3' // OpenAI supports mp3, opus, aac, and flac (not wav)
|
|
};
|
|
|
|
// Predefined voices
|
|
this.voices = [
|
|
{ id: 'alloy', name: 'Alloy', language: 'en' },
|
|
{ id: 'echo', name: 'Echo', language: 'en' },
|
|
{ id: 'fable', name: 'Fable', language: 'en' },
|
|
{ id: 'onyx', name: 'Onyx', language: 'en' },
|
|
{ id: 'nova', name: 'Nova', language: 'en' },
|
|
{ id: 'shimmer', name: 'Shimmer', language: 'en' }
|
|
];
|
|
|
|
// Bind methods
|
|
this.bindMethods([
|
|
'initialize',
|
|
'speak',
|
|
'speakPreloaded',
|
|
'preloadSpeech',
|
|
'stop',
|
|
'isAvailable',
|
|
'getId',
|
|
'getVoices',
|
|
'setVoiceOptions',
|
|
'getModule',
|
|
'setupVoiceFromPreferences',
|
|
'loadVoices',
|
|
'selectVoiceForLocale',
|
|
'selectDefaultVoice',
|
|
'generateSpeechAudio',
|
|
'getDefaultApiBaseUrl'
|
|
]);
|
|
}
|
|
|
|
/**
|
|
* Initialize the OpenAI TTS handler
|
|
* @param {Function} progressCallback - Callback for progress updates
|
|
* @returns {Promise<boolean>} - Resolves with success status
|
|
*/
|
|
async initialize(progressCallback = null) {
|
|
try {
|
|
// Call parent initialize method
|
|
const initSuccess = await super.initialize(progressCallback);
|
|
|
|
if (!initSuccess) {
|
|
return false;
|
|
}
|
|
|
|
// Load voice preferences
|
|
const persistenceManager = this.getModule('persistence-manager');
|
|
if (persistenceManager) {
|
|
// Load model preference
|
|
const model = persistenceManager.getPreference('tts', 'openai_model', 'tts-1');
|
|
if (model) {
|
|
this.voiceOptions.model = model;
|
|
}
|
|
|
|
// Load format preference
|
|
const format = persistenceManager.getPreference('tts', 'openai_format', 'mp3');
|
|
if (format) {
|
|
this.voiceOptions.response_format = format;
|
|
}
|
|
}
|
|
|
|
// OpenAI TTS should be considered available if the API key is set
|
|
// This will be checked by the parent class already
|
|
|
|
return true;
|
|
} catch (error) {
|
|
console.error('OpenAI TTS: Initialization error:', error);
|
|
if (progressCallback) {
|
|
progressCallback(100, `OpenAI TTS initialization failed - ${error.message}`);
|
|
}
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get the default API base URL for OpenAI
|
|
* @returns {string} - Default API base URL
|
|
*/
|
|
getDefaultApiBaseUrl() {
|
|
return 'https://api.openai.com/v1';
|
|
}
|
|
|
|
/**
|
|
* Load available voices from OpenAI API
|
|
* @returns {Promise<boolean>} - Resolves with success status
|
|
*/
|
|
async loadVoices() {
|
|
// OpenAI has a fixed set of voices, no need to fetch them
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Select a voice for the given locale
|
|
* @param {string} locale - Locale code
|
|
* @returns {boolean} - Success status
|
|
*/
|
|
selectVoiceForLocale(locale) {
|
|
// Extract language code from locale (e.g., 'en-US' -> 'en')
|
|
const langCode = locale.split('-')[0].toLowerCase();
|
|
|
|
// All OpenAI voices are English-based, so if the locale is English, we might want to pick a specific voice
|
|
// Otherwise, just use the default voice
|
|
if (langCode === 'en') {
|
|
this.voiceOptions.voice = 'nova'; // A bit more natural-sounding for general use
|
|
return true;
|
|
}
|
|
|
|
// For non-English locales, still use a default voice (OpenAI voices can handle multiple languages)
|
|
return this.selectDefaultVoice();
|
|
}
|
|
|
|
/**
|
|
* Select a default voice
|
|
* @returns {boolean} - Success status
|
|
*/
|
|
selectDefaultVoice() {
|
|
this.voiceOptions.voice = 'alloy';
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Generate speech audio data using OpenAI API
|
|
* @param {string} text - Text to generate speech for
|
|
* @returns {Promise<Object>} - Audio data (Blob)
|
|
*/
|
|
async generateSpeechAudio(text) {
|
|
if (!text || !this.apiKey) {
|
|
return null;
|
|
}
|
|
|
|
try {
|
|
// Create request payload
|
|
const payload = {
|
|
model: this.voiceOptions.model || 'tts-1',
|
|
input: text,
|
|
voice: this.voiceOptions.voice || 'alloy',
|
|
response_format: this.voiceOptions.response_format || 'mp3',
|
|
speed: this.voiceOptions.speed || 1.0
|
|
};
|
|
|
|
// Make API request
|
|
const response = await fetch(`${this.apiBaseUrl}/audio/speech`, {
|
|
method: 'POST',
|
|
headers: {
|
|
'Content-Type': 'application/json',
|
|
'Authorization': `Bearer ${this.apiKey}`
|
|
},
|
|
body: JSON.stringify(payload)
|
|
});
|
|
|
|
if (!response.ok) {
|
|
const errorText = await response.text();
|
|
throw new Error(`API error: ${response.status} ${response.statusText} - ${errorText}`);
|
|
}
|
|
|
|
// Get audio blob from response
|
|
const audioBlob = await response.blob();
|
|
|
|
// Note: OpenAI doesn't support WAV format directly, so we're using the format specified in voiceOptions
|
|
// The audio element should still be able to play mp3/opus/aac properly
|
|
return new Blob([audioBlob], { type: `audio/${this.voiceOptions.response_format}` });
|
|
} catch (error) {
|
|
console.error('OpenAI TTS: Error generating speech:', error);
|
|
return null;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get available voices
|
|
* @returns {Promise<Array>} - Resolves with array of voice objects
|
|
*/
|
|
async getVoices() {
|
|
if (!this.available) {
|
|
return [];
|
|
}
|
|
|
|
// OpenAI has a fixed set of voices
|
|
return this.voices;
|
|
}
|
|
|
|
/**
|
|
* Set voice options
|
|
* @param {Object} options - Voice options
|
|
*/
|
|
setVoiceOptions(options = {}) {
|
|
// Call parent method for common options
|
|
super.setVoiceOptions(options);
|
|
|
|
// Handle OpenAI-specific options
|
|
if (options.model) {
|
|
this.voiceOptions.model = options.model;
|
|
|
|
// Save the model preference
|
|
const persistenceManager = this.getModule('persistence-manager');
|
|
if (persistenceManager) {
|
|
persistenceManager.updatePreference('tts', 'openai_model', options.model);
|
|
}
|
|
}
|
|
|
|
if (options.response_format) {
|
|
// Ensure valid format: mp3, opus, aac, or flac
|
|
const validFormats = ['mp3', 'opus', 'aac', 'flac'];
|
|
if (validFormats.includes(options.response_format)) {
|
|
this.voiceOptions.response_format = options.response_format;
|
|
|
|
// Save the format preference
|
|
const persistenceManager = this.getModule('persistence-manager');
|
|
if (persistenceManager) {
|
|
persistenceManager.updatePreference('tts', 'openai_format', options.response_format);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Create the singleton instance
|
|
const OpenAITTS = new OpenAITTSHandler();
|