Added support for openai api tts.

2025-04-05 14:40:56 +00:00
parent b8e2e6e238
commit e8eb93ae1b
11 changed files with 2063 additions and 989 deletions
@@ -0,0 +1,235 @@
+/**
+ * OpenAI TTS Handler
+ * Provides TTS via OpenAI API
+ */
+import { ApiTTSHandlerBase } from './api-tts-handler-base.js';
+
+export class OpenAITTSHandler extends ApiTTSHandlerBase {
+    constructor() {
+        super('openai', 'OpenAI TTS');
+        
+        // Voice options specific to OpenAI
+        this.voiceOptions = {
+            voice: 'alloy', // Default voice for OpenAI
+            model: 'tts-1', // Standard model
+            speed: 1.0,
+            response_format: 'mp3' // OpenAI supports mp3, opus, aac, and flac (not wav)
+        };
+        
+        // Predefined voices
+        this.voices = [
+            { id: 'alloy', name: 'Alloy', language: 'en' },
+            { id: 'echo', name: 'Echo', language: 'en' },
+            { id: 'fable', name: 'Fable', language: 'en' },
+            { id: 'onyx', name: 'Onyx', language: 'en' },
+            { id: 'nova', name: 'Nova', language: 'en' },
+            { id: 'shimmer', name: 'Shimmer', language: 'en' }
+        ];
+        
+        // Bind methods
+        this.bindMethods([
+            'initialize',
+            'speak',
+            'speakPreloaded',
+            'preloadSpeech',
+            'stop',
+            'isAvailable',
+            'getId',
+            'getVoices',
+            'setVoiceOptions',
+            'getModule',
+            'setupVoiceFromPreferences',
+            'loadVoices',
+            'selectVoiceForLocale',
+            'selectDefaultVoice',
+            'generateSpeechAudio',
+            'getDefaultApiBaseUrl'
+        ]);
+    }
+    
+    /**
+     * Initialize the OpenAI TTS handler
+     * @param {Function} progressCallback - Callback for progress updates
+     * @returns {Promise<boolean>} - Resolves with success status
+     */
+    async initialize(progressCallback = null) {
+        try {
+            // Call parent initialize method
+            const initSuccess = await super.initialize(progressCallback);
+            
+            if (!initSuccess) {
+                return false;
+            }
+            
+            // Load voice preferences
+            const persistenceManager = this.getModule('persistence-manager');
+            if (persistenceManager) {
+                // Load model preference
+                const model = persistenceManager.getPreference('tts', 'openai_model', 'tts-1');
+                if (model) {
+                    this.voiceOptions.model = model;
+                }
+                
+                // Load format preference
+                const format = persistenceManager.getPreference('tts', 'openai_format', 'mp3');
+                if (format) {
+                    this.voiceOptions.response_format = format;
+                }
+            }
+            
+            // OpenAI TTS should be considered available if the API key is set
+            // This will be checked by the parent class already
+            
+            return true;
+        } catch (error) {
+            console.error('OpenAI TTS: Initialization error:', error);
+            if (progressCallback) {
+                progressCallback(100, `OpenAI TTS initialization failed - ${error.message}`);
+            }
+            return false;
+        }
+    }
+    
+    /**
+     * Get the default API base URL for OpenAI
+     * @returns {string} - Default API base URL
+     */
+    getDefaultApiBaseUrl() {
+        return 'https://api.openai.com/v1';
+    }
+    
+    /**
+     * Load available voices from OpenAI API
+     * @returns {Promise<boolean>} - Resolves with success status
+     */
+    async loadVoices() {
+        // OpenAI has a fixed set of voices, no need to fetch them
+        return true;
+    }
+    
+    /**
+     * Select a voice for the given locale
+     * @param {string} locale - Locale code
+     * @returns {boolean} - Success status
+     */
+    selectVoiceForLocale(locale) {
+        // Extract language code from locale (e.g., 'en-US' -> 'en')
+        const langCode = locale.split('-')[0].toLowerCase();
+        
+        // All OpenAI voices are English-based, so if the locale is English, we might want to pick a specific voice
+        // Otherwise, just use the default voice
+        if (langCode === 'en') {
+            this.voiceOptions.voice = 'nova'; // A bit more natural-sounding for general use
+            return true;
+        }
+        
+        // For non-English locales, still use a default voice (OpenAI voices can handle multiple languages)
+        return this.selectDefaultVoice();
+    }
+    
+    /**
+     * Select a default voice
+     * @returns {boolean} - Success status
+     */
+    selectDefaultVoice() {
+        this.voiceOptions.voice = 'alloy';
+        return true;
+    }
+    
+    /**
+     * Generate speech audio data using OpenAI API
+     * @param {string} text - Text to generate speech for
+     * @returns {Promise<Object>} - Audio data (Blob)
+     */
+    async generateSpeechAudio(text) {
+        if (!text || !this.apiKey) {
+            return null;
+        }
+        
+        try {
+            // Create request payload
+            const payload = {
+                model: this.voiceOptions.model || 'tts-1',
+                input: text,
+                voice: this.voiceOptions.voice || 'alloy',
+                response_format: this.voiceOptions.response_format || 'mp3',
+                speed: this.voiceOptions.speed || 1.0
+            };
+            
+            // Make API request
+            const response = await fetch(`${this.apiBaseUrl}/audio/speech`, {
+                method: 'POST',
+                headers: {
+                    'Content-Type': 'application/json',
+                    'Authorization': `Bearer ${this.apiKey}`
+                },
+                body: JSON.stringify(payload)
+            });
+            
+            if (!response.ok) {
+                const errorText = await response.text();
+                throw new Error(`API error: ${response.status} ${response.statusText} - ${errorText}`);
+            }
+            
+            // Get audio blob from response
+            const audioBlob = await response.blob();
+            
+            // Note: OpenAI doesn't support WAV format directly, so we're using the format specified in voiceOptions
+            // The audio element should still be able to play mp3/opus/aac properly
+            return new Blob([audioBlob], { type: `audio/${this.voiceOptions.response_format}` });
+        } catch (error) {
+            console.error('OpenAI TTS: Error generating speech:', error);
+            return null;
+        }
+    }
+    
+    /**
+     * Get available voices
+     * @returns {Promise<Array>} - Resolves with array of voice objects
+     */
+    async getVoices() {
+        if (!this.available) {
+            return [];
+        }
+        
+        // OpenAI has a fixed set of voices
+        return this.voices;
+    }
+    
+    /**
+     * Set voice options
+     * @param {Object} options - Voice options
+     */
+    setVoiceOptions(options = {}) {
+        // Call parent method for common options
+        super.setVoiceOptions(options);
+        
+        // Handle OpenAI-specific options
+        if (options.model) {
+            this.voiceOptions.model = options.model;
+            
+            // Save the model preference
+            const persistenceManager = this.getModule('persistence-manager');
+            if (persistenceManager) {
+                persistenceManager.updatePreference('tts', 'openai_model', options.model);
+            }
+        }
+        
+        if (options.response_format) {
+            // Ensure valid format: mp3, opus, aac, or flac
+            const validFormats = ['mp3', 'opus', 'aac', 'flac'];
+            if (validFormats.includes(options.response_format)) {
+                this.voiceOptions.response_format = options.response_format;
+                
+                // Save the format preference
+                const persistenceManager = this.getModule('persistence-manager');
+                if (persistenceManager) {
+                    persistenceManager.updatePreference('tts', 'openai_format', options.response_format);
+                }
+            }
+        }
+    }
+}
+
+// Create the singleton instance
+const OpenAITTS = new OpenAITTSHandler();