ai.interactive.fiction/public/js/openai-tts-handler.js

/**
 * OpenAI TTS Handler
 * Provides TTS via OpenAI API
 */
import { ApiTTSHandlerBase } from './api-tts-handler-base.js';

export class OpenAITTSHandler extends ApiTTSHandlerBase {
    constructor() {
        super('openai', 'OpenAI TTS');

        // Voice options specific to OpenAI
        this.voiceOptions = {
            voice: 'alloy', // Default voice for OpenAI
            model: 'tts-1', // Standard model
            speed: 1.0,
            response_format: 'mp3' // OpenAI supports mp3, opus, aac, and flac (not wav)
        };

        // Predefined voices
        this.voices = [
            { id: 'alloy', name: 'Alloy', language: 'en' },
            { id: 'echo', name: 'Echo', language: 'en' },
            { id: 'fable', name: 'Fable', language: 'en' },
            { id: 'onyx', name: 'Onyx', language: 'en' },
            { id: 'nova', name: 'Nova', language: 'en' },
            { id: 'shimmer', name: 'Shimmer', language: 'en' }
        ];

        // Bind methods
        this.bindMethods([
            'initialize',
            'speak',
            'speakPreloaded',
            'preloadSpeech',
            'stop',
            'isAvailable',
            'getId',
            'getVoices',
            'setVoiceOptions',
            'getModule',
            'setupVoiceFromPreferences',
            'loadVoices',
            'selectVoiceForLocale',
            'selectDefaultVoice',
            'generateSpeechAudio',
            'getDefaultApiBaseUrl'
        ]);
    }

    /**
     * Initialize the OpenAI TTS handler
     * @param {Function} progressCallback - Callback for progress updates
     * @returns {Promise<boolean>} - Resolves with success status
     */
    async initialize(progressCallback = null) {
        try {
            // Call parent initialize method
            const initSuccess = await super.initialize(progressCallback);

            if (!initSuccess) {
                return false;
            }

            // Load voice preferences
            const persistenceManager = this.getModule('persistence-manager');
            if (persistenceManager) {
                // Load model preference
                const model = persistenceManager.getPreference('tts', 'openai_model', 'tts-1');
                if (model) {
                    this.voiceOptions.model = model;
                }

                // Load format preference
                const format = persistenceManager.getPreference('tts', 'openai_format', 'mp3');
                if (format) {
                    this.voiceOptions.response_format = format;
                }
            }

            // OpenAI TTS should be considered available if the API key is set
            // This will be checked by the parent class already

            return true;
        } catch (error) {
            console.error('OpenAI TTS: Initialization error:', error);
            if (progressCallback) {
                progressCallback(100, `OpenAI TTS initialization failed - ${error.message}`);
            }
            return false;
        }
    }

    /**
     * Get the default API base URL for OpenAI
     * @returns {string} - Default API base URL
     */
    getDefaultApiBaseUrl() {
        return 'https://api.openai.com/v1';
    }

    /**
     * Load available voices from OpenAI API
     * @returns {Promise<boolean>} - Resolves with success status
     */
    async loadVoices() {
        // OpenAI has a fixed set of voices, no need to fetch them
        return true;
    }

    /**
     * Select a voice for the given locale
     * @param {string} locale - Locale code
     * @returns {boolean} - Success status
     */
    selectVoiceForLocale(locale) {
        // Extract language code from locale (e.g., 'en-US' -> 'en')
        const langCode = locale.split('-')[0].toLowerCase();

        // All OpenAI voices are English-based, so if the locale is English, we might want to pick a specific voice
        // Otherwise, just use the default voice
        if (langCode === 'en') {
            this.voiceOptions.voice = 'nova'; // A bit more natural-sounding for general use
            return true;
        }

        // For non-English locales, still use a default voice (OpenAI voices can handle multiple languages)
        return this.selectDefaultVoice();
    }

    /**
     * Select a default voice
     * @returns {boolean} - Success status
     */
    selectDefaultVoice() {
        this.voiceOptions.voice = 'alloy';
        return true;
    }

    /**
     * Generate speech audio data using OpenAI API
     * @param {string} text - Text to generate speech for
     * @returns {Promise<Object>} - Audio data (Blob)
     */
    async generateSpeechAudio(text) {
        if (!text || !this.apiKey) {
            return null;
        }

        try {
            // Create request payload
            const payload = {
                model: this.voiceOptions.model || 'tts-1',
                input: text,
                voice: this.voiceOptions.voice || 'alloy',
                response_format: this.voiceOptions.response_format || 'mp3',
                speed: this.voiceOptions.speed || 1.0
            };

            // Make API request
            const response = await fetch(`${this.apiBaseUrl}/audio/speech`, {
                method: 'POST',
                headers: {
                    'Content-Type': 'application/json',
                    'Authorization': `Bearer ${this.apiKey}`
                },
                body: JSON.stringify(payload)
            });

            if (!response.ok) {
                const errorText = await response.text();
                throw new Error(`API error: ${response.status} ${response.statusText} - ${errorText}`);
            }

            // Get audio blob from response
            const audioBlob = await response.blob();

            // Note: OpenAI doesn't support WAV format directly, so we're using the format specified in voiceOptions
            // The audio element should still be able to play mp3/opus/aac properly
            return new Blob([audioBlob], { type: `audio/${this.voiceOptions.response_format}` });
        } catch (error) {
            console.error('OpenAI TTS: Error generating speech:', error);
            return null;
        }
    }

    /**
     * Get available voices
     * @returns {Promise<Array>} - Resolves with array of voice objects
     */
    async getVoices() {
        if (!this.available) {
            return [];
        }

        // OpenAI has a fixed set of voices
        return this.voices;
    }

    /**
     * Set voice options
     * @param {Object} options - Voice options
     */
    setVoiceOptions(options = {}) {
        // Call parent method for common options
        super.setVoiceOptions(options);

        // Handle OpenAI-specific options
        if (options.model) {
            this.voiceOptions.model = options.model;

            // Save the model preference
            const persistenceManager = this.getModule('persistence-manager');
            if (persistenceManager) {
                persistenceManager.updatePreference('tts', 'openai_model', options.model);
            }
        }

        if (options.response_format) {
            // Ensure valid format: mp3, opus, aac, or flac
            const validFormats = ['mp3', 'opus', 'aac', 'flac'];
            if (validFormats.includes(options.response_format)) {
                this.voiceOptions.response_format = options.response_format;

                // Save the format preference
                const persistenceManager = this.getModule('persistence-manager');
                if (persistenceManager) {
                    persistenceManager.updatePreference('tts', 'openai_format', options.response_format);
                }
            }
        }
    }
}

// Create the singleton instance
const OpenAITTS = new OpenAITTSHandler();