307 lines
10 KiB
JavaScript
307 lines
10 KiB
JavaScript
/**
|
|
* OpenAITTSModule
|
|
* Provides TTS via OpenAI API
|
|
*/
|
|
import { ApiTTSModuleBase } from './api-tts-module-base.js';
|
|
|
|
export class OpenAITTSModule extends ApiTTSModuleBase {
|
|
constructor() {
|
|
super('openai-tts', 'OpenAI TTS');
|
|
|
|
this.supportedVoices = [
|
|
{ id: 'alloy', name: 'Alloy', language: 'en' },
|
|
{ id: 'ash', name: 'Ash', language: 'en' },
|
|
{ id: 'coral', name: 'Coral', language: 'en' },
|
|
{ id: 'echo', name: 'Echo', language: 'en' },
|
|
{ id: 'fable', name: 'Fable', language: 'en' },
|
|
{ id: 'nova', name: 'Nova', language: 'en' },
|
|
{ id: 'onyx', name: 'Onyx', language: 'en' },
|
|
{ id: 'sage', name: 'Sage', language: 'en' },
|
|
{ id: 'shimmer', name: 'Shimmer', language: 'en' }
|
|
];
|
|
|
|
// Voice options specific to OpenAI
|
|
this.voiceOptions = {
|
|
voice: 'alloy', // Default voice for OpenAI
|
|
model: 'tts-1-hd', // Standard model
|
|
speed: 1.0,
|
|
response_format: 'mp3' // OpenAI supports mp3, opus, aac, and flac (not wav)
|
|
};
|
|
|
|
// OpenAI has a documented fixed voice set for this speech endpoint.
|
|
this.voices = [...this.supportedVoices];
|
|
}
|
|
|
|
/**
|
|
* Get the default API base URL for OpenAI
|
|
* @returns {string} - Default API base URL
|
|
*/
|
|
getDefaultApiBaseUrl() {
|
|
return 'https://api.openai.com/v1';
|
|
}
|
|
|
|
/**
|
|
* Initialize the module
|
|
* @returns {Promise<boolean>} - Resolves with success status
|
|
*/
|
|
async initialize() {
|
|
try {
|
|
this.reportProgress(10, 'Initializing OpenAI TTS');
|
|
|
|
// Initialize parent
|
|
const parentInit = await super.initialize();
|
|
if (!parentInit) {
|
|
console.error('OpenAI TTS: Parent initialization failed');
|
|
return false;
|
|
}
|
|
|
|
// Get required dependencies
|
|
const persistenceManager = this.getModule('persistence-manager');
|
|
if (!persistenceManager) {
|
|
console.error('OpenAI TTS: Required dependency persistence-manager not found');
|
|
return false;
|
|
}
|
|
|
|
// API key is already loaded in parent initialize() method
|
|
// Just check if it's available
|
|
if (!this.apiKey) {
|
|
console.info('OpenAI TTS: API key not configured; provider unavailable until configured');
|
|
this.isReady = false;
|
|
this.reportProgress(100, 'OpenAI TTS not configured');
|
|
return true;
|
|
}
|
|
|
|
// Load preferences
|
|
const preferredVoice = persistenceManager.getPreference('tts', `${this.id}_voice`, this.voiceOptions.voice);
|
|
if (preferredVoice) {
|
|
this.voiceOptions.voice = this.normalizeVoiceId(preferredVoice);
|
|
}
|
|
|
|
const preferredModel = persistenceManager.getPreference('tts', `${this.id}_model`, this.voiceOptions.model);
|
|
if (preferredModel) {
|
|
this.voiceOptions.model = preferredModel;
|
|
}
|
|
|
|
const preferredSpeed = persistenceManager.getPreference('tts', 'speed', this.voiceOptions.speed);
|
|
if (typeof preferredSpeed === 'number') {
|
|
this.voiceOptions.speed = this.getApiSpeed(preferredSpeed);
|
|
}
|
|
|
|
const apiReachable = await this.loadVoices();
|
|
if (!apiReachable) {
|
|
this.isReady = false;
|
|
this.reportProgress(100, 'OpenAI TTS not ready');
|
|
return true;
|
|
}
|
|
|
|
this.isReady = true;
|
|
this.reportProgress(100, 'OpenAI TTS initialized');
|
|
return true;
|
|
} catch (error) {
|
|
console.error('OpenAI TTS: Initialization error:', error);
|
|
this.isReady = false;
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Load available voices
|
|
* @returns {Promise<boolean>} - Resolves with success status
|
|
*/
|
|
async loadVoices() {
|
|
// OpenAI exposes a documented fixed TTS voice set, not a voice-list
|
|
// endpoint. Use /models as a lightweight credential/endpoint check.
|
|
this.voices = this.getAvailableVoices();
|
|
if (!this.apiKey) {
|
|
return true;
|
|
}
|
|
|
|
try {
|
|
const response = await fetch(`${this.apiBaseUrl}/models`, {
|
|
method: 'GET',
|
|
headers: {
|
|
'Authorization': `Bearer ${this.apiKey}`
|
|
}
|
|
});
|
|
|
|
if (!response.ok) {
|
|
console.error(`OpenAI TTS: API validation failed ${response.status} ${response.statusText}`);
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
} catch (error) {
|
|
console.error('OpenAI TTS: API validation error:', error);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Select a voice for the given locale
|
|
* @param {string} locale - Locale code
|
|
* @returns {boolean} - Success status
|
|
*/
|
|
selectVoiceForLocale(locale) {
|
|
// Extract language code from locale (e.g., 'en-US' -> 'en')
|
|
const langCode = locale.split('-')[0].toLowerCase();
|
|
|
|
// All OpenAI voices are English-based
|
|
// Return default voice
|
|
return this.selectDefaultVoice();
|
|
}
|
|
|
|
/**
|
|
* Select a default voice
|
|
* @returns {boolean} - Success status
|
|
*/
|
|
selectDefaultVoice() {
|
|
this.voiceOptions.voice = 'alloy'; // Default voice
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Get available voices
|
|
* @returns {Array} - Array of voice objects
|
|
*/
|
|
getAvailableVoices() {
|
|
this.voices = [...this.supportedVoices];
|
|
return this.voices;
|
|
}
|
|
|
|
/**
|
|
* Generate speech audio data using OpenAI API
|
|
* @param {string} text - Text to generate speech for
|
|
* @returns {Promise<Object>} - Audio data object
|
|
*/
|
|
async generateSpeechAudio(text) {
|
|
if (!this.isReady || !this.apiKey) {
|
|
return { success: false, reason: 'not_ready' };
|
|
}
|
|
|
|
try {
|
|
// Process the text
|
|
const processedText = this.preprocessText(text);
|
|
|
|
// Create request payload
|
|
const payload = {
|
|
model: this.voiceOptions.model || 'tts-1',
|
|
input: processedText,
|
|
voice: this.normalizeVoiceId(this.voiceOptions.voice),
|
|
response_format: this.voiceOptions.response_format || 'mp3',
|
|
speed: this.getApiSpeed(this.voiceOptions.speed)
|
|
};
|
|
|
|
// Make API request
|
|
const response = await fetch(`${this.apiBaseUrl}/audio/speech`, {
|
|
method: 'POST',
|
|
headers: {
|
|
'Content-Type': 'application/json',
|
|
'Authorization': `Bearer ${this.apiKey}`
|
|
},
|
|
body: JSON.stringify(payload)
|
|
});
|
|
|
|
if (!response.ok) {
|
|
const errorText = await response.text();
|
|
throw new Error(`API error: ${response.status} ${response.statusText} - ${errorText}`);
|
|
}
|
|
|
|
// Get audio blob from response
|
|
const audioBlob = await response.blob();
|
|
|
|
// Convert to array buffer for consistency with other modules
|
|
const arrayBuffer = await audioBlob.arrayBuffer();
|
|
|
|
return {
|
|
success: true,
|
|
audioData: arrayBuffer
|
|
};
|
|
} catch (error) {
|
|
console.error('OpenAI TTS: Error generating speech:', error);
|
|
return {
|
|
success: false,
|
|
reason: 'api_error',
|
|
error: error.message
|
|
};
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Set voice options
|
|
* @param {Object} options - Voice options
|
|
*/
|
|
setVoiceOptions(options = {}) {
|
|
// Handle common options
|
|
if (options.voice) {
|
|
this.voiceOptions.voice = this.normalizeVoiceId(options.voice);
|
|
|
|
// Save voice preference
|
|
const persistenceManager = this.getModule('persistence-manager');
|
|
if (persistenceManager) {
|
|
persistenceManager.updatePreference('tts', `${this.id}_voice`, this.voiceOptions.voice);
|
|
}
|
|
}
|
|
|
|
if (typeof options.speed === 'number') {
|
|
// OpenAI speech speed uses 1.0 as normal. The app-wide slider also
|
|
// uses 1.0 as normal, so only clamp at the provider API boundary.
|
|
this.voiceOptions.speed = this.getApiSpeed(options.speed);
|
|
}
|
|
|
|
// Handle OpenAI-specific options
|
|
if (options.model) {
|
|
this.voiceOptions.model = options.model;
|
|
|
|
// Save the model preference
|
|
const persistenceManager = this.getModule('persistence-manager');
|
|
if (persistenceManager) {
|
|
persistenceManager.updatePreference('tts', `${this.id}_model`, options.model);
|
|
}
|
|
}
|
|
|
|
if (options.response_format) {
|
|
// Ensure valid format: mp3, opus, aac, or flac
|
|
const validFormats = ['mp3', 'opus', 'aac', 'flac'];
|
|
if (validFormats.includes(options.response_format)) {
|
|
this.voiceOptions.response_format = options.response_format;
|
|
|
|
// Save the format preference
|
|
const persistenceManager = this.getModule('persistence-manager');
|
|
if (persistenceManager) {
|
|
persistenceManager.updatePreference('tts', `${this.id}_format`, options.response_format);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
getVoiceId(voice) {
|
|
if (!voice) return '';
|
|
if (typeof voice === 'string') return voice;
|
|
return voice.id || voice.name || '';
|
|
}
|
|
|
|
normalizeVoiceId(voice) {
|
|
const voiceId = this.getVoiceId(voice).toLowerCase();
|
|
const supported = new Set(this.supportedVoices.map(item => item.id));
|
|
|
|
if (supported.has(voiceId)) {
|
|
return voiceId;
|
|
}
|
|
|
|
if (voiceId) {
|
|
console.warn(`OpenAI TTS: Unsupported voice "${voiceId}", falling back to alloy`);
|
|
}
|
|
|
|
return 'alloy';
|
|
}
|
|
|
|
getApiSpeed(speed) {
|
|
const value = Number.isFinite(speed) ? speed : 1.0;
|
|
return Math.max(0.25, Math.min(4.0, value));
|
|
}
|
|
}
|
|
|
|
const openAITTSModule = new OpenAITTSModule();
|
|
|
|
export { openAITTSModule };
|