Update TTS providers and story markup

This commit is contained in:
2026-05-20 22:13:31 +02:00
parent b911c40d89
commit 8258ea2321
36 changed files with 1482 additions and 197 deletions
+106 -18
View File
@@ -8,7 +8,13 @@ export class OpenAITTSModule extends ApiTTSModuleBase {
constructor() {
super('openai-tts', 'OpenAI TTS');
this.supportedVoices = [
this.supportedModels = [
{ id: 'tts-1', name: 'TTS-1' },
{ id: 'tts-1-hd', name: 'TTS-1 HD' },
{ id: 'gpt-4o-mini-tts', name: 'GPT-4o mini TTS' }
];
this.legacyVoices = [
{ id: 'alloy', name: 'Alloy', language: 'en' },
{ id: 'ash', name: 'Ash', language: 'en' },
{ id: 'coral', name: 'Coral', language: 'en' },
@@ -19,6 +25,25 @@ export class OpenAITTSModule extends ApiTTSModuleBase {
{ id: 'sage', name: 'Sage', language: 'en' },
{ id: 'shimmer', name: 'Shimmer', language: 'en' }
];
this.gpt4oMiniVoices = [
{ id: 'alloy', name: 'Alloy', language: 'en' },
{ id: 'ash', name: 'Ash', language: 'en' },
{ id: 'ballad', name: 'Ballad', language: 'en' },
{ id: 'coral', name: 'Coral', language: 'en' },
{ id: 'echo', name: 'Echo', language: 'en' },
{ id: 'fable', name: 'Fable', language: 'en' },
{ id: 'nova', name: 'Nova', language: 'en' },
{ id: 'onyx', name: 'Onyx', language: 'en' },
{ id: 'sage', name: 'Sage', language: 'en' },
{ id: 'shimmer', name: 'Shimmer', language: 'en' },
{ id: 'verse', name: 'Verse', language: 'en' },
{ id: 'marin', name: 'Marin', language: 'en' },
{ id: 'cedar', name: 'Cedar', language: 'en' }
];
this.supportedVoices = [...this.gpt4oMiniVoices];
this.supportsTtsInstructions = true;
// Voice options specific to OpenAI
this.voiceOptions = {
@@ -62,15 +87,6 @@ export class OpenAITTSModule extends ApiTTSModuleBase {
return false;
}
// API key is already loaded in parent initialize() method
// Just check if it's available
if (!this.apiKey) {
console.info('OpenAI TTS: API key not configured; provider unavailable until configured');
this.isReady = false;
this.reportProgress(100, 'OpenAI TTS not configured');
return true;
}
// Load preferences
const preferredVoice = persistenceManager.getPreference('tts', `${this.id}_voice`, this.voiceOptions.voice);
if (preferredVoice) {
@@ -79,12 +95,25 @@ export class OpenAITTSModule extends ApiTTSModuleBase {
const preferredModel = persistenceManager.getPreference('tts', `${this.id}_model`, this.voiceOptions.model);
if (preferredModel) {
this.voiceOptions.model = preferredModel;
this.voiceOptions.model = this.normalizeModelId(preferredModel);
}
this.voices = this.getAvailableVoices();
this.voiceOptions.voice = this.normalizeVoiceId(this.voiceOptions.voice);
const preferredSpeed = persistenceManager.getPreference('tts', 'speed', this.voiceOptions.speed);
if (typeof preferredSpeed === 'number') {
this.voiceOptions.speed = this.getApiSpeed(preferredSpeed);
this.voiceOptions.speed = this.normalizeAppSpeed(preferredSpeed);
}
// API key is already loaded in parent initialize() method.
// Model and voice preferences still need to be available for the
// options UI even before credentials are configured.
if (!this.apiKey) {
console.info('OpenAI TTS: API key not configured; provider unavailable until configured');
this.isReady = false;
this.reportProgress(100, 'OpenAI TTS not configured');
return true;
}
const apiReachable = await this.loadVoices();
@@ -164,9 +193,13 @@ export class OpenAITTSModule extends ApiTTSModuleBase {
* @returns {Array} - Array of voice objects
*/
getAvailableVoices() {
this.voices = [...this.supportedVoices];
this.voices = this.getVoicesForModel(this.voiceOptions.model);
return this.voices;
}
async getVoices() {
return this.getAvailableVoices();
}
/**
* Generate speech audio data using OpenAI API
@@ -190,6 +223,11 @@ export class OpenAITTSModule extends ApiTTSModuleBase {
response_format: this.voiceOptions.response_format || 'mp3',
speed: this.getApiSpeed(this.voiceOptions.speed)
};
const instructions = this.getRequestInstructions(options);
if (instructions) {
payload.instructions = instructions;
}
// Make API request
const response = await fetch(`${this.apiBaseUrl}/audio/speech`, {
@@ -246,17 +284,20 @@ export class OpenAITTSModule extends ApiTTSModuleBase {
if (typeof options.speed === 'number') {
// OpenAI speech speed uses 1.0 as normal. The app-wide slider also
// uses 1.0 as normal, so only clamp at the provider API boundary.
this.voiceOptions.speed = this.getApiSpeed(options.speed);
this.voiceOptions.speed = this.normalizeAppSpeed(options.speed);
}
// Handle OpenAI-specific options
if (options.model) {
this.voiceOptions.model = options.model;
this.voiceOptions.model = this.normalizeModelId(options.model);
this.voices = this.getAvailableVoices();
this.voiceOptions.voice = this.normalizeVoiceId(this.voiceOptions.voice);
// Save the model preference
const persistenceManager = this.getModule('persistence-manager');
if (persistenceManager) {
persistenceManager.updatePreference('tts', `${this.id}_model`, options.model);
persistenceManager.updatePreference('tts', `${this.id}_model`, this.voiceOptions.model);
persistenceManager.updatePreference('tts', `${this.id}_voice`, this.voiceOptions.voice);
}
}
@@ -283,7 +324,7 @@ export class OpenAITTSModule extends ApiTTSModuleBase {
normalizeVoiceId(voice) {
const voiceId = this.getVoiceId(voice).toLowerCase();
const supported = new Set(this.supportedVoices.map(item => item.id));
const supported = new Set(this.getVoicesForModel(this.voiceOptions.model).map(item => item.id));
if (supported.has(voiceId)) {
return voiceId;
@@ -296,10 +337,57 @@ export class OpenAITTSModule extends ApiTTSModuleBase {
return 'alloy';
}
normalizeModelId(model) {
const modelId = String(model || '').trim();
const supported = new Set(this.supportedModels.map(item => item.id));
if (supported.has(modelId)) {
return modelId;
}
if (modelId) {
console.warn(`OpenAI TTS: Unsupported model "${modelId}", falling back to tts-1-hd`);
}
return 'tts-1-hd';
}
getVoicesForModel(model) {
const modelId = this.normalizeModelId(model || this.voiceOptions.model);
if (modelId === 'gpt-4o-mini-tts') {
return [...this.gpt4oMiniVoices];
}
return [...this.legacyVoices];
}
getRequestInstructions(options = {}) {
if (this.normalizeModelId(this.voiceOptions.model) !== 'gpt-4o-mini-tts') {
return '';
}
const instructions = Array.isArray(options.ttsInstructions)
? options.ttsInstructions
: [];
const matching = instructions
.filter(entry => {
const provider = String(entry?.provider || '').trim();
return !provider || provider === this.id;
})
.map(entry => String(entry?.instruction || '').trim())
.filter(Boolean);
return matching.length > 0 ? matching[matching.length - 1] : '';
}
getApiSpeed(speed) {
const value = Number.isFinite(speed) ? speed : 1.0;
const value = Number.isFinite(Number(speed)) ? Number(speed) : this.normalizeAppSpeed(speed);
return Math.max(0.25, Math.min(4.0, value));
}
normalizeAppSpeed(speed) {
const value = Number.isFinite(Number(speed)) ? Number(speed) : 1.0;
return Math.max(0.5, Math.min(2.0, value));
}
}
const openAITTSModule = new OpenAITTSModule();