Fix Kokoro TTS integration issues: Remove API key requirement and ensure system-specific options display correctly

2025-04-05 22:06:22 +00:00
parent e5a3016846
commit fc693ae695
11 changed files with 3296 additions and 596 deletions
@@ -0,0 +1,255 @@
+/**
+ * OpenAITTSModule
+ * Provides TTS via OpenAI API
+ */
+import { ApiTTSModuleBase } from './api-tts-module-base.js';
+
+export class OpenAITTSModule extends ApiTTSModuleBase {
+    constructor() {
+        super('openai', 'OpenAI TTS');
+        
+        // Voice options specific to OpenAI
+        this.voiceOptions = {
+            voice: 'alloy', // Default voice for OpenAI
+            model: 'tts-1', // Standard model
+            speed: 1.0,
+            response_format: 'mp3' // OpenAI supports mp3, opus, aac, and flac (not wav)
+        };
+        
+        // Predefined voices - OpenAI has a fixed set
+        this.voices = [
+            { id: 'alloy', name: 'Alloy', language: 'en' },
+            { id: 'echo', name: 'Echo', language: 'en' },
+            { id: 'fable', name: 'Fable', language: 'en' },
+            { id: 'onyx', name: 'Onyx', language: 'en' },
+            { id: 'nova', name: 'Nova', language: 'en' },
+            { id: 'shimmer', name: 'Shimmer', language: 'en' }
+        ];
+    }
+    
+    /**
+     * Get the default API base URL for OpenAI
+     * @returns {string} - Default API base URL
+     */
+    getDefaultApiBaseUrl() {
+        return 'https://api.openai.com/v1';
+    }
+    
+    /**
+     * Initialize the module
+     * @returns {Promise<boolean>} - Resolves with success status
+     */
+    async initialize() {
+        try {
+            this.reportProgress(10, 'Initializing OpenAI TTS');
+            
+            // Initialize parent
+            const parentInit = await super.initialize();
+            if (!parentInit) {
+                console.error('OpenAI TTS: Parent initialization failed');
+                return false;
+            }
+            
+            // Get required dependencies
+            const persistenceManager = this.getModule('persistence-manager');
+            if (!persistenceManager) {
+                console.error('OpenAI TTS: Required dependency persistence-manager not found');
+                return false;
+            }
+            
+            // Check for API key
+            const apiKey = persistenceManager.getPreference('openai', 'api_key', '');
+            if (!apiKey) {
+                console.error('OpenAI TTS: API key not configured');
+                return false;
+            }
+            
+            // Set API key
+            this.apiKey = apiKey;
+            
+            // Load preferences
+            const preferredVoice = persistenceManager.getPreference('openai', 'voice', this.voiceOptions.voice);
+            if (preferredVoice) {
+                this.voiceOptions.voice = preferredVoice;
+            }
+            
+            const preferredModel = persistenceManager.getPreference('openai', 'model', this.voiceOptions.model);
+            if (preferredModel) {
+                this.voiceOptions.model = preferredModel;
+            }
+            
+            const preferredSpeed = persistenceManager.getPreference('openai', 'speed', this.voiceOptions.speed);
+            if (typeof preferredSpeed === 'number') {
+                this.voiceOptions.speed = preferredSpeed;
+            }
+            
+            // Setup available voices
+            this.voices = this.getAvailableVoices();
+            
+            this.isReady = true;
+            this.reportProgress(100, 'OpenAI TTS initialized');
+            return true;
+        } catch (error) {
+            console.error('OpenAI TTS: Initialization error:', error);
+            this.isReady = false;
+            return false;
+        }
+    }
+    
+    /**
+     * Load available voices
+     * @returns {Promise<boolean>} - Resolves with success status
+     */
+    async loadVoices() {
+        // OpenAI has a fixed set of voices, no need to fetch them
+        return true;
+    }
+    
+    /**
+     * Select a voice for the given locale
+     * @param {string} locale - Locale code
+     * @returns {boolean} - Success status
+     */
+    selectVoiceForLocale(locale) {
+        // Extract language code from locale (e.g., 'en-US' -> 'en')
+        const langCode = locale.split('-')[0].toLowerCase();
+        
+        // All OpenAI voices are English-based
+        // For English locales, we could customize the voice selection
+        // For non-English locales, we'll just use the default
+        
+        // In this simple implementation, we'll just use the default voice
+        return this.selectDefaultVoice();
+    }
+    
+    /**
+     * Select a default voice
+     * @returns {boolean} - Success status
+     */
+    selectDefaultVoice() {
+        this.voiceOptions.voice = 'alloy';
+        return true;
+    }
+    
+    /**
+     * Generate speech audio data using OpenAI API
+     * @param {string} text - Text to generate speech for
+     * @returns {Promise<Object>} - Audio data object
+     */
+    async generateSpeechAudio(text) {
+        if (!text || !this.apiKey) {
+            return { 
+                success: false, 
+                reason: 'missing_api_key_or_text' 
+            };
+        }
+        
+        try {
+            // Process the text
+            const processedText = this.preprocessText(text);
+            
+            // Create request payload
+            const payload = {
+                model: this.voiceOptions.model || 'tts-1',
+                input: processedText,
+                voice: this.voiceOptions.voice || 'alloy',
+                response_format: this.voiceOptions.response_format || 'mp3',
+                speed: this.voiceOptions.speed || 1.0
+            };
+            
+            // Make API request
+            const response = await fetch(`${this.apiBaseUrl}/audio/speech`, {
+                method: 'POST',
+                headers: {
+                    'Content-Type': 'application/json',
+                    'Authorization': `Bearer ${this.apiKey}`
+                },
+                body: JSON.stringify(payload)
+            });
+            
+            if (!response.ok) {
+                const errorText = await response.text();
+                throw new Error(`API error: ${response.status} ${response.statusText} - ${errorText}`);
+            }
+            
+            // Get audio blob from response
+            const audioBlob = await response.blob();
+            
+            // Convert to array buffer for consistency with other modules
+            const arrayBuffer = await audioBlob.arrayBuffer();
+            
+            return {
+                success: true,
+                audioData: arrayBuffer
+            };
+        } catch (error) {
+            console.error('OpenAI TTS: Error generating speech:', error);
+            return { 
+                success: false, 
+                reason: 'api_error', 
+                error: error.message 
+            };
+        }
+    }
+    
+    /**
+     * Set voice options
+     * @param {Object} options - Voice options
+     */
+    setVoiceOptions(options = {}) {
+        // Handle common options
+        if (options.voice) {
+            this.voiceOptions.voice = options.voice;
+            
+            // Save voice preference
+            const persistenceManager = this.getModule('persistence-manager');
+            if (persistenceManager) {
+                persistenceManager.updatePreference('tts', 'openai_voice', options.voice);
+            }
+        }
+        
+        if (typeof options.speed === 'number') {
+            this.voiceOptions.speed = Math.max(0.5, Math.min(2.0, options.speed));
+        }
+        
+        // Handle OpenAI-specific options
+        if (options.model) {
+            this.voiceOptions.model = options.model;
+            
+            // Save the model preference
+            const persistenceManager = this.getModule('persistence-manager');
+            if (persistenceManager) {
+                persistenceManager.updatePreference('tts', 'openai_model', options.model);
+            }
+        }
+        
+        if (options.response_format) {
+            // Ensure valid format: mp3, opus, aac, or flac
+            const validFormats = ['mp3', 'opus', 'aac', 'flac'];
+            if (validFormats.includes(options.response_format)) {
+                this.voiceOptions.response_format = options.response_format;
+                
+                // Save the format preference
+                const persistenceManager = this.getModule('persistence-manager');
+                if (persistenceManager) {
+                    persistenceManager.updatePreference('tts', 'openai_format', options.response_format);
+                }
+            }
+        }
+    }
+}
+
+// Register the module with the module registry
+// Module registry MUST be accessed via window, not direct import
+if (window.moduleRegistry) {
+    try {
+        // Create instance first, then register it
+        const openAITTSModule = new OpenAITTSModule();
+        window.moduleRegistry.register(openAITTSModule);
+        console.log('OpenAI TTS Module registered successfully');
+    } catch (err) {
+        console.error('Failed to register OpenAI TTS Module:', err);
+    }
+} else {
+    console.error('Module registry not available when attempting to register OpenAI TTS Module');
+}