ai.interactive.fiction/public/js/openai-tts-module.js

/**
 * OpenAITTSModule
 * Provides TTS via OpenAI API
 */
import { ApiTTSModuleBase } from './api-tts-module-base.js';

export class OpenAITTSModule extends ApiTTSModuleBase {
    constructor() {
        super('openai-tts', 'OpenAI TTS');

        this.supportedVoices = [
            { id: 'alloy', name: 'Alloy', language: 'en' },
            { id: 'ash', name: 'Ash', language: 'en' },
            { id: 'coral', name: 'Coral', language: 'en' },
            { id: 'echo', name: 'Echo', language: 'en' },
            { id: 'fable', name: 'Fable', language: 'en' },
            { id: 'nova', name: 'Nova', language: 'en' },
            { id: 'onyx', name: 'Onyx', language: 'en' },
            { id: 'sage', name: 'Sage', language: 'en' },
            { id: 'shimmer', name: 'Shimmer', language: 'en' }
        ];

        // Voice options specific to OpenAI
        this.voiceOptions = {
            voice: 'alloy', // Default voice for OpenAI
            model: 'tts-1-hd', // Standard model
            speed: 1.0,
            response_format: 'mp3' // OpenAI supports mp3, opus, aac, and flac (not wav)
        };

        // OpenAI has a documented fixed voice set for this speech endpoint.
        this.voices = [...this.supportedVoices];
    }

    /**
     * Get the default API base URL for OpenAI
     * @returns {string} - Default API base URL
     */
    getDefaultApiBaseUrl() {
        return 'https://api.openai.com/v1';
    }

    /**
     * Initialize the module
     * @returns {Promise<boolean>} - Resolves with success status
     */
    async initialize() {
        try {
            this.reportProgress(10, 'Initializing OpenAI TTS');

            // Initialize parent
            const parentInit = await super.initialize();
            if (!parentInit) {
                console.error('OpenAI TTS: Parent initialization failed');
                return false;
            }

            // Get required dependencies
            const persistenceManager = this.getModule('persistence-manager');
            if (!persistenceManager) {
                console.error('OpenAI TTS: Required dependency persistence-manager not found');
                return false;
            }

            // API key is already loaded in parent initialize() method
            // Just check if it's available
            if (!this.apiKey) {
                console.info('OpenAI TTS: API key not configured; provider unavailable until configured');
                this.isReady = false;
                this.reportProgress(100, 'OpenAI TTS not configured');
                return true;
            }

            // Load preferences
            const preferredVoice = persistenceManager.getPreference('tts', `${this.id}_voice`, this.voiceOptions.voice);
            if (preferredVoice) {
                this.voiceOptions.voice = this.normalizeVoiceId(preferredVoice);
            }

            const preferredModel = persistenceManager.getPreference('tts', `${this.id}_model`, this.voiceOptions.model);
            if (preferredModel) {
                this.voiceOptions.model = preferredModel;
            }

            const preferredSpeed = persistenceManager.getPreference('tts', 'speed', this.voiceOptions.speed);
            if (typeof preferredSpeed === 'number') {
                this.voiceOptions.speed = this.getApiSpeed(preferredSpeed);
            }

            const apiReachable = await this.loadVoices();
            if (!apiReachable) {
                this.isReady = false;
                this.reportProgress(100, 'OpenAI TTS not ready');
                return true;
            }

            this.isReady = true;
            this.reportProgress(100, 'OpenAI TTS initialized');
            return true;
        } catch (error) {
            console.error('OpenAI TTS: Initialization error:', error);
            this.isReady = false;
            return false;
        }
    }

    /**
     * Load available voices
     * @returns {Promise<boolean>} - Resolves with success status
     */
    async loadVoices() {
        // OpenAI exposes a documented fixed TTS voice set, not a voice-list
        // endpoint. Use /models as a lightweight credential/endpoint check.
        this.voices = this.getAvailableVoices();
        if (!this.apiKey) {
            return true;
        }

        try {
            const response = await fetch(`${this.apiBaseUrl}/models`, {
                method: 'GET',
                headers: {
                    'Authorization': `Bearer ${this.apiKey}`
                }
            });

            if (!response.ok) {
                console.error(`OpenAI TTS: API validation failed ${response.status} ${response.statusText}`);
                return false;
            }

            return true;
        } catch (error) {
            console.error('OpenAI TTS: API validation error:', error);
            return false;
        }
    }

    /**
     * Select a voice for the given locale
     * @param {string} locale - Locale code
     * @returns {boolean} - Success status
     */
    selectVoiceForLocale(locale) {
        // Extract language code from locale (e.g., 'en-US' -> 'en')
        const langCode = locale.split('-')[0].toLowerCase();

        // All OpenAI voices are English-based
        // Return default voice
        return this.selectDefaultVoice();
    }

    /**
     * Select a default voice
     * @returns {boolean} - Success status
     */
    selectDefaultVoice() {
        this.voiceOptions.voice = 'alloy';  // Default voice
        return true;
    }

    /**
     * Get available voices
     * @returns {Array} - Array of voice objects
     */
    getAvailableVoices() {
        this.voices = [...this.supportedVoices];
        return this.voices;
    }

    /**
     * Generate speech audio data using OpenAI API
     * @param {string} text - Text to generate speech for
     * @returns {Promise<Object>} - Audio data object
     */
    async generateSpeechAudio(text) {
        if (!this.isReady || !this.apiKey) {
            return { success: false, reason: 'not_ready' };
        }

        try {
            // Process the text
            const processedText = this.preprocessText(text);

            // Create request payload
            const payload = {
                model: this.voiceOptions.model || 'tts-1',
                input: processedText,
                voice: this.normalizeVoiceId(this.voiceOptions.voice),
                response_format: this.voiceOptions.response_format || 'mp3',
                speed: this.getApiSpeed(this.voiceOptions.speed)
            };

            // Make API request
            const response = await fetch(`${this.apiBaseUrl}/audio/speech`, {
                method: 'POST',
                headers: {
                    'Content-Type': 'application/json',
                    'Authorization': `Bearer ${this.apiKey}`
                },
                body: JSON.stringify(payload)
            });

            if (!response.ok) {
                const errorText = await response.text();
                throw new Error(`API error: ${response.status} ${response.statusText} - ${errorText}`);
            }

            // Get audio blob from response
            const audioBlob = await response.blob();

            // Convert to array buffer for consistency with other modules
            const arrayBuffer = await audioBlob.arrayBuffer();

            return {
                success: true,
                audioData: arrayBuffer
            };
        } catch (error) {
            console.error('OpenAI TTS: Error generating speech:', error);
            return {
                success: false,
                reason: 'api_error',
                error: error.message
            };
        }
    }

    /**
     * Set voice options
     * @param {Object} options - Voice options
     */
    setVoiceOptions(options = {}) {
        // Handle common options
        if (options.voice) {
            this.voiceOptions.voice = this.normalizeVoiceId(options.voice);

            // Save voice preference
            const persistenceManager = this.getModule('persistence-manager');
            if (persistenceManager) {
                persistenceManager.updatePreference('tts', `${this.id}_voice`, this.voiceOptions.voice);
            }
        }

        if (typeof options.speed === 'number') {
            // OpenAI speech speed uses 1.0 as normal. The app-wide slider also
            // uses 1.0 as normal, so only clamp at the provider API boundary.
            this.voiceOptions.speed = this.getApiSpeed(options.speed);
        }

        // Handle OpenAI-specific options
        if (options.model) {
            this.voiceOptions.model = options.model;

            // Save the model preference
            const persistenceManager = this.getModule('persistence-manager');
            if (persistenceManager) {
                persistenceManager.updatePreference('tts', `${this.id}_model`, options.model);
            }
        }

        if (options.response_format) {
            // Ensure valid format: mp3, opus, aac, or flac
            const validFormats = ['mp3', 'opus', 'aac', 'flac'];
            if (validFormats.includes(options.response_format)) {
                this.voiceOptions.response_format = options.response_format;

                // Save the format preference
                const persistenceManager = this.getModule('persistence-manager');
                if (persistenceManager) {
                    persistenceManager.updatePreference('tts', `${this.id}_format`, options.response_format);
                }
            }
        }
    }

    getVoiceId(voice) {
        if (!voice) return '';
        if (typeof voice === 'string') return voice;
        return voice.id || voice.name || '';
    }

    normalizeVoiceId(voice) {
        const voiceId = this.getVoiceId(voice).toLowerCase();
        const supported = new Set(this.supportedVoices.map(item => item.id));

        if (supported.has(voiceId)) {
            return voiceId;
        }

        if (voiceId) {
            console.warn(`OpenAI TTS: Unsupported voice "${voiceId}", falling back to alloy`);
        }

        return 'alloy';
    }

    getApiSpeed(speed) {
        const value = Number.isFinite(speed) ? speed : 1.0;
        return Math.max(0.25, Math.min(4.0, value));
    }
}

const openAITTSModule = new OpenAITTSModule();

export { openAITTSModule };