Update TTS providers and story markup

This commit is contained in:
2026-05-20 22:13:31 +02:00
parent b911c40d89
commit 8258ea2321
36 changed files with 1482 additions and 197 deletions
+6 -3
View File
@@ -1668,7 +1668,8 @@ body:not([data-game-running="true"]) #start_prompt {
.modal-footer button,
.option-item input[type="text"],
.option-item input[type="password"],
.option-item input[type="url"] {
.option-item input[type="url"],
.option-item input[type="number"] {
background-color: transparent;
border: 1px solid var(--panel-border);
border-radius: var(--control-radius);
@@ -1684,7 +1685,8 @@ body:not([data-game-running="true"]) #start_prompt {
.option-item input[type="text"],
.option-item input[type="password"],
.option-item input[type="url"] {
.option-item input[type="url"],
.option-item input[type="number"] {
box-sizing: border-box;
width: min(18rem, 60%);
padding: 0.3rem 0.5rem;
@@ -1692,7 +1694,8 @@ body:not([data-game-running="true"]) #start_prompt {
.option-item input[type="text"]:focus,
.option-item input[type="password"]:focus,
.option-item input[type="url"]:focus {
.option-item input[type="url"]:focus,
.option-item input[type="number"]:focus {
outline: none;
box-shadow: 0 0 0 2px rgba(90, 57, 33, 0.14);
}
+8 -5
View File
@@ -17,7 +17,7 @@ class AnimationQueueModule extends BaseModule {
// Animation timing properties - use parent's config system
this.updateConfig({
speed: 1.0, // Speed multiplier for delays (1.0 = no scaling, delays are pre-calculated)
speed: 1.0,
fastForwardEnabled: false
});
@@ -44,7 +44,9 @@ class AnimationQueueModule extends BaseModule {
// Listen for speed changes from UI
document.addEventListener('animation:speed:change', (event) => {
if (event.detail && typeof event.detail.speed === 'number') {
// Speed from UI is a rate multiplier (0.5-2.0 typically)
// Word timings are already speed-scaled before they reach
// the scheduler. Keep the value only for diagnostics/API
// compatibility; do not apply it again in schedule().
this.config.speed = event.detail.speed;
console.log(`AnimationQueue: Speed updated to ${this.config.speed}`);
}
@@ -71,8 +73,9 @@ class AnimationQueueModule extends BaseModule {
return -1;
}
// Adjust delay based on fast-forward or speed settings
const actualDelay = this.config.fastForwardEnabled ? 0 : Math.max(0, delay * this.config.speed);
// Delays are absolute timings calculated from the prepared sentence
// duration. TTS/app speed has already been applied at that stage.
const actualDelay = this.config.fastForwardEnabled ? 0 : Math.max(0, delay);
// Record the delay for tracking
this.delay = Math.max(this.delay, delay);
@@ -318,7 +321,7 @@ class AnimationQueueModule extends BaseModule {
/**
* Set the animation speed
* @param {number} speed - Animation speed factor (lower is faster)
* @param {number} speed - Stored speed value for compatibility/diagnostics
*/
setSpeed(speed) {
if (typeof speed !== 'number' || speed <= 0) {
+37 -19
View File
@@ -27,7 +27,7 @@ export class BrowserTTSModule extends TTSHandlerModule {
this.currentUtterance = null;
// Bind additional methods
this.bindMethods(['handleVoicePreferenceChanged']);
this.bindMethods(['handleVoicePreferenceChanged', 'estimateSpeechDuration']);
}
/**
@@ -368,26 +368,29 @@ export class BrowserTTSModule extends TTSHandlerModule {
// Set up event handlers
utterance.onstart = this.utteranceHandlers.start;
utterance.onend = () => {
this.utteranceHandlers.end();
if (callback) {
callback({ success: true });
}
};
utterance.onerror = (event) => {
this.utteranceHandlers.error(event);
if (callback) {
callback({ success: false, reason: 'synthesis_error', error: event });
}
};
utterance.onpause = this.utteranceHandlers.pause;
utterance.onresume = this.utteranceHandlers.resume;
// Start speaking
this.currentUtterance = utterance;
speechSynthesis.speak(utterance);
return true;
return new Promise(resolve => {
utterance.onend = () => {
this.utteranceHandlers.end();
if (callback) {
callback({ success: true });
}
resolve(true);
};
utterance.onerror = (event) => {
this.utteranceHandlers.error(event);
if (callback) {
callback({ success: false, reason: 'synthesis_error', error: event });
}
resolve(false);
};
speechSynthesis.speak(utterance);
});
} catch (error) {
console.error('Browser TTS: Failed to speak:', error);
if (callback) {
@@ -469,7 +472,7 @@ export class BrowserTTSModule extends TTSHandlerModule {
if (typeof options.speed === 'number') {
// Web Speech rate uses 1.0 as normal, matching the app-wide slider.
this.voiceOptions.speed = Math.max(0.1, Math.min(10.0, options.speed));
this.voiceOptions.speed = Math.max(0.5, Math.min(2.0, options.speed));
}
if (typeof options.pitch === 'number') {
@@ -494,8 +497,23 @@ export class BrowserTTSModule extends TTSHandlerModule {
* @returns {Promise<Object>} - Promise that resolves to null
*/
async preloadSpeech(text) {
// Browser TTS can't preload speech
return { success: false, reason: 'not_supported' };
if (!this.isReady || !text) {
return { success: false, reason: 'not_ready_or_empty_text' };
}
return {
success: true,
text,
duration: this.estimateSpeechDuration(text),
directPlayback: true
};
}
estimateSpeechDuration(text) {
const processedText = this.preprocessText(text);
const charactersPerSecond = 12;
const speed = Math.max(0.5, Math.min(2.0, Number(this.voiceOptions.speed) || 1.0));
return Math.max((processedText.length / (charactersPerSecond * speed)) * 1000, 800);
}
/**
+58 -2
View File
@@ -35,6 +35,9 @@ class ChoiceDisplayModule extends BaseModule {
'render',
'clear',
'normalizeChoices',
'orderChoicesForPresentation',
'shuffleChoices',
'randomInt',
'assignLetters',
'selectChoice',
'getTagValue',
@@ -137,7 +140,7 @@ class ChoiceDisplayModule extends BaseModule {
}
normalizeChoices(choices) {
return this.assignLetters(choices.slice(0, 36).map((choice, order) => {
const normalized = choices.slice(0, 36).map((choice, order) => {
const tags = Array.isArray(choice.tags) ? choice.tags : [];
const category = choice.category || this.getTagValue(tags, 'action');
return {
@@ -145,11 +148,64 @@ class ChoiceDisplayModule extends BaseModule {
text: String(choice.text || ''),
tags,
category,
sourceOrder: order,
optional: this.hasTag(tags, 'optional'),
letter: '',
templateCell: this.getTemplateCell({ ...choice, tags, category })
};
}));
});
return this.assignLetters(this.orderChoicesForPresentation(normalized));
}
orderChoicesForPresentation(choices) {
const groupOrder = [];
const grouped = new Map();
const ungrouped = [];
choices.forEach((choice) => {
const group = String(choice.category || '').trim();
if (!group) {
ungrouped.push(choice);
return;
}
if (!grouped.has(group)) {
grouped.set(group, []);
groupOrder.push(group);
}
grouped.get(group).push(choice);
});
const ordered = [];
groupOrder.forEach((group) => {
ordered.push(...this.shuffleChoices(grouped.get(group) || []));
});
if (ungrouped.length > 0) {
ordered.push(...this.shuffleChoices(ungrouped));
}
return ordered;
}
shuffleChoices(choices) {
const shuffled = choices.slice();
for (let index = shuffled.length - 1; index > 0; index -= 1) {
const swapIndex = this.randomInt(index + 1);
[shuffled[index], shuffled[swapIndex]] = [shuffled[swapIndex], shuffled[index]];
}
return shuffled;
}
randomInt(exclusiveMax) {
const max = Math.max(1, Number(exclusiveMax) || 1);
if (window.crypto && typeof window.crypto.getRandomValues === 'function') {
const values = new Uint32Array(1);
window.crypto.getRandomValues(values);
return values[0] % max;
}
return Math.floor(Math.random() * max);
}
assignLetters(choices) {
+13 -3
View File
@@ -75,7 +75,7 @@ export class ElevenLabsTTSModule extends ApiTTSModuleBase {
const preferredSpeed = persistenceManager.getPreference('tts', 'speed', this.voiceOptions.speed);
if (typeof preferredSpeed === 'number') {
this.voiceOptions.speed = this.getApiSpeed(preferredSpeed);
this.voiceOptions.speed = this.normalizeAppSpeed(preferredSpeed);
}
this.isReady = true;
@@ -255,7 +255,7 @@ export class ElevenLabsTTSModule extends ApiTTSModuleBase {
}
if (typeof options.speed === 'number') {
this.voiceOptions.speed = this.getApiSpeed(options.speed);
this.voiceOptions.speed = this.normalizeAppSpeed(options.speed);
}
// Handle ElevenLabs-specific options
@@ -271,7 +271,17 @@ export class ElevenLabsTTSModule extends ApiTTSModuleBase {
}
getApiSpeed(speed) {
return Math.max(0.7, Math.min(1.2, Number.isFinite(speed) ? speed : 1.0));
const appSpeed = this.normalizeAppSpeed(speed);
if (appSpeed <= 1.0) {
return 0.7 + ((appSpeed - 0.5) / 0.5) * 0.3;
}
return 1.0 + (appSpeed - 1.0) * 0.2;
}
normalizeAppSpeed(speed) {
const value = Number.isFinite(Number(speed)) ? Number(speed) : 1.0;
return Math.max(0.5, Math.min(2.0, value));
}
}
+30 -1
View File
@@ -210,9 +210,21 @@ class GameLoopModule extends BaseModule {
return false;
}
await this.resetClientPlaybackAndDisplay();
this.currentChoices = [];
this.currentInputMode = 'none';
document.dispatchEvent(new CustomEvent('story:choices', { detail: [] }));
document.dispatchEvent(new CustomEvent('story:input-mode', { detail: 'none' }));
document.dispatchEvent(new CustomEvent('story:history-restoring', {
detail: { active: true, reason: 'autosave-reconnect-prepare' }
}));
const response = await socketClient.resumeGame(browserSave.inkState);
if (!response?.success) {
console.warn('GameLoop: autosave resume failed', response);
document.dispatchEvent(new CustomEvent('story:history-restoring', {
detail: { active: false, reason: 'autosave-reconnect-failed' }
}));
return false;
}
@@ -222,7 +234,7 @@ class GameLoopModule extends BaseModule {
this.gameState.canSave = this.gameState.started;
this.gameState.canLoad = true;
this.updateUIState();
await this.restoreBrowserSave(browserSave, 'autosave-resume', { resetDisplay: true });
await this.restoreBrowserSave(browserSave, 'autosave-resume', { resetDisplay: false });
this.restoreInputStateFromSave(browserSave, 'autosave-resume');
return true;
}
@@ -281,6 +293,14 @@ class GameLoopModule extends BaseModule {
const storyHistory = this.getModule('story-history');
if (storyHistory && typeof storyHistory.startNewGame === 'function') {
await storyHistory.startNewGame();
if (typeof storyHistory.saveSlot === 'function') {
await storyHistory.saveSlot(this.autoSaveSlot, {
inkState: null,
choices: [],
inputMode: 'none',
running: false
});
}
}
const response = await socketClient.newGame();
if (!response?.success) {
@@ -296,6 +316,15 @@ class GameLoopModule extends BaseModule {
this.gameState.canSave = true;
this.gameState.canLoad = Boolean(response.canLoad);
this.updateUIState();
if (response.savedState && storyHistory && typeof storyHistory.saveSlot === 'function') {
await storyHistory.saveSlot(this.autoSaveSlot, {
inkState: response.savedState,
choices: [],
inputMode: 'none',
running: true
});
this.lastInkState = response.savedState;
}
}
/**
+30 -1
View File
@@ -20,6 +20,7 @@ export class KokoroTTSModule extends TTSHandlerModule {
this.lastProgressTime = null;
this.lastProgressValue = null;
this.modelLoaded = false;
this.unsupportedReason = '';
// Options for playback
this.options = {
@@ -37,7 +38,8 @@ export class KokoroTTSModule extends TTSHandlerModule {
'pause',
'resume',
'getDefaultVoices',
'setVoiceOptions'
'setVoiceOptions',
'supportsGameLanguage'
]);
}
@@ -59,6 +61,18 @@ export class KokoroTTSModule extends TTSHandlerModule {
return false;
}
const gameConfig = this.getModule('game-config');
const gameLanguage = gameConfig?.getLocale?.() || 'en_US';
if (!this.supportsGameLanguage(gameLanguage)) {
this.voices = [];
this.isReady = false;
this.unsupportedReason = `Kokoro TTS supports English and Chinese only; game language is ${gameLanguage}`;
this.reportProgress(100, 'Kokoro TTS disabled for this language');
console.log(`Kokoro TTS: ${this.unsupportedReason}`);
return true;
}
this.unsupportedReason = '';
this.addEventListener(document, 'preference-updated', (event) => {
const { category, key } = event.detail || {};
if (category === 'audio' && ['masterVolume', 'ttsVolume', 'masterVolumeEnabled', 'ttsVolumeEnabled'].includes(key) && this.currentAudio) {
@@ -388,12 +402,27 @@ export class KokoroTTSModule extends TTSHandlerModule {
return Math.max(0, Math.min(1, this.options.volume * (masterEnabled ? masterVolume : 0) * (ttsEnabled ? ttsVolume : 0)));
}
supportsGameLanguage(language) {
const normalized = String(language || '').trim().replace('_', '-').toLowerCase();
const languageCode = normalized.split('-')[0];
return languageCode === 'en'
|| languageCode === 'english'
|| languageCode === 'zh'
|| languageCode === 'chinese'
|| languageCode === 'cmn'
|| languageCode === 'yue';
}
/**
* Get available voices
* @returns {Array} - Array of voice objects
*/
async getVoices() {
if (this.unsupportedReason) {
return [];
}
// If no voices are loaded yet, return default voices
if (!this.voices || this.voices.length === 0) {
return this.getDefaultVoices();
+191 -12
View File
@@ -27,6 +27,12 @@ class LayoutRendererModule extends BaseModule {
'decorateInlineWord',
'applyGlossaryEntries',
'normalizeGlossaryText',
'normalizeGlossaryToken',
'normalizeGlossaryCompact',
'buildGlossaryTermPatterns',
'buildCompactGlossaryTermPatterns',
'decorateGlossarySegment',
'decorateGlossaryRange',
'decorateGlossaryWord',
'ensureGlossaryTooltip',
'showGlossaryTooltip',
@@ -337,34 +343,56 @@ class LayoutRendererModule extends BaseModule {
let cursor = 0;
const segments = [];
let compactCursor = 0;
const compactSegments = [];
const fullText = words.map((word, index) => {
if (index > 0) cursor += 1;
const start = cursor;
cursor += word.text.length;
segments.push({ ...word, start, end: cursor });
const compactText = this.normalizeGlossaryCompact(word.text);
if (compactText) {
const compactStart = compactCursor;
compactCursor += compactText.length;
compactSegments.push({ ...word, start: compactStart, end: compactCursor });
}
return word.text;
}).join(' ');
const compactFullText = words.map(word => this.normalizeGlossaryCompact(word.text)).join('');
entries
.filter(entry => entry && entry.term && entry.definition)
.forEach(entry => {
const normalizedTerm = this.normalizeGlossaryText(entry.term);
if (!normalizedTerm) return;
const matcher = new RegExp(`(^|\\s)(${this.escapeRegExp(normalizedTerm)})(?=\\s|$|[.,;:!?])`, 'giu');
let match;
while ((match = matcher.exec(fullText)) !== null) {
const matchStart = match.index + match[1].length;
const matchEnd = matchStart + match[2].length;
segments
.filter(segment => segment.end > matchStart && segment.start < matchEnd)
.forEach(segment => this.decorateGlossaryWord(segment.element, entry));
}
this.buildGlossaryTermPatterns(entry.term).forEach((pattern) => {
const matcher = new RegExp(`(^|\\s)(${pattern})(?=\\s|$|[.,;:!?])`, 'giu');
let match;
while ((match = matcher.exec(fullText)) !== null) {
const matchStart = match.index + match[1].length;
const matchEnd = matchStart + match[2].length;
segments
.filter(segment => segment.end > matchStart && segment.start < matchEnd)
.forEach(segment => this.decorateGlossarySegment(segment, entry, matchStart, matchEnd, 'text'));
}
});
this.buildCompactGlossaryTermPatterns(entry.term).forEach((pattern) => {
const matcher = new RegExp(pattern, 'giu');
let match;
while ((match = matcher.exec(compactFullText)) !== null) {
const matchStart = match.index;
const matchEnd = matchStart + match[0].length;
compactSegments
.filter(segment => segment.end > matchStart && segment.start < matchEnd)
.forEach(segment => this.decorateGlossarySegment(segment, entry, matchStart, matchEnd, 'compact'));
}
});
});
}
normalizeGlossaryText(text) {
return String(text || '')
.normalize('NFC')
.replace(/\u200c/g, '')
.replace(/\u00ad/g, '')
.replace(/-\s*$/g, '')
@@ -372,6 +400,157 @@ class LayoutRendererModule extends BaseModule {
.trim();
}
normalizeGlossaryToken(text) {
return this.normalizeGlossaryText(text)
.replace(/^[.,;:!?()[\]{}"'„“”‚‘’»«]+|[.,;:!?()[\]{}"'„“”‚‘’»«]+$/g, '');
}
normalizeGlossaryCompact(text) {
return this.normalizeGlossaryToken(text)
.replace(/[-\s]+/g, '')
.replace(/[.,;:!?()[\]{}"'„“”‚‘’»«]+/g, '');
}
buildGlossaryTermPatterns(term) {
const normalizedTerm = this.normalizeGlossaryText(term);
if (!normalizedTerm) return [];
const exact = normalizedTerm
.split(/\s+/)
.map(token => this.escapeRegExp(this.normalizeGlossaryToken(token)))
.filter(Boolean)
.join('\\s+');
if (!exact) return [];
const inflected = normalizedTerm
.split(/\s+/)
.map((token, index, tokens) => {
const normalized = this.normalizeGlossaryToken(token);
if (!normalized) return '';
const escaped = this.escapeRegExp(normalized);
const isLast = index === tokens.length - 1;
return isLast ? `${escaped}(?:s|es|e|en|er|n)?` : `${escaped}(?:e|en|er|es|n)?`;
})
.filter(Boolean)
.join('\\s+');
return [...new Set([exact, inflected])];
}
buildCompactGlossaryTermPatterns(term) {
const tokens = this.normalizeGlossaryText(term)
.split(/\s+/)
.map(token => this.normalizeGlossaryCompact(token))
.filter(Boolean);
if (tokens.length === 0) return [];
const exact = tokens.map(token => this.escapeRegExp(token)).join('');
const inflected = tokens
.map((token, index) => {
const escaped = this.escapeRegExp(token);
const isLast = index === tokens.length - 1;
return isLast ? `${escaped}(?:s|es|e|en|er|n)?` : `${escaped}(?:e|en|er|es|n)?`;
})
.join('');
return [...new Set([exact, inflected])];
}
decorateGlossarySegment(segment, entry, matchStart, matchEnd, mode = 'text') {
if (!segment?.element || !entry?.definition) return;
const localStart = Math.max(0, matchStart - segment.start);
const localEnd = Math.min(segment.end - segment.start, matchEnd - segment.start);
if (localEnd <= localStart) return;
const segmentLength = mode === 'compact'
? this.normalizeGlossaryCompact(segment.text).length
: segment.text.length;
if (localStart <= 0 && localEnd >= segmentLength) {
this.decorateGlossaryWord(segment.element, entry);
return;
}
if (mode === 'compact') {
return;
}
this.decorateGlossaryRange(segment.element, entry, localStart, localEnd);
}
decorateGlossaryRange(word, entry, start, end) {
if (!word || !entry?.definition) return;
const text = word.textContent || '';
const safeStart = Math.max(0, Math.min(text.length, start));
const safeEnd = Math.max(safeStart, Math.min(text.length, end));
if (safeStart === 0 && safeEnd >= text.length) {
this.decorateGlossaryWord(word, entry);
return;
}
if (safeEnd <= safeStart) return;
word.dataset.glossaryPartial = 'true';
const textNodes = [];
const filter = window.NodeFilter || NodeFilter;
const walker = document.createTreeWalker(word, filter.SHOW_TEXT);
let node;
while ((node = walker.nextNode())) {
textNodes.push(node);
}
let offset = 0;
textNodes.forEach((textNode) => {
const nodeText = textNode.nodeValue || '';
const nodeStart = offset;
const nodeEnd = nodeStart + nodeText.length;
offset = nodeEnd;
const overlapStart = Math.max(safeStart, nodeStart);
const overlapEnd = Math.min(safeEnd, nodeEnd);
if (overlapEnd <= overlapStart || !textNode.parentNode) return;
const localStart = overlapStart - nodeStart;
const localEnd = overlapEnd - nodeStart;
const before = nodeText.slice(0, localStart);
const matched = nodeText.slice(localStart, localEnd);
const after = nodeText.slice(localEnd);
const parent = textNode.parentNode;
if (before) {
parent.insertBefore(document.createTextNode(before), textNode);
}
if (matched) {
const gloss = document.createElement('span');
gloss.textContent = matched;
this.decorateGlossaryWord(gloss, entry);
parent.insertBefore(gloss, textNode);
}
if (after) {
parent.insertBefore(document.createTextNode(after), textNode);
}
parent.removeChild(textNode);
});
if (textNodes.length === 0) {
const before = text.slice(0, safeStart);
const matched = text.slice(safeStart, safeEnd);
const after = text.slice(safeEnd);
word.textContent = '';
if (before) word.appendChild(document.createTextNode(before));
const gloss = document.createElement('span');
gloss.textContent = matched;
this.decorateGlossaryWord(gloss, entry);
word.appendChild(gloss);
if (after) word.appendChild(document.createTextNode(after));
}
}
decorateGlossaryWord(word, entry) {
if (!word || !entry?.definition) return;
word.classList.add('story-glossary-word');
+1
View File
@@ -122,6 +122,7 @@ const ModuleLoader = (function() {
{ id: 'browser-tts', script: '/js/browser-tts-module.js', weight: 12 },
{ id: 'elevenlabs-tts', script: '/js/elevenlabs-tts-module.js', weight: 12 },
{ id: 'openai-tts', script: '/js/openai-tts-module.js', weight: 12 },
{ id: 'local-openai-tts', script: '/js/local-openai-tts-module.js', weight: 12 },
{ id: 'tts-factory', script: '/js/tts-factory-module.js', weight: 13 }, // TTSFactory must be loaded before TTSPlayer
// UI and interaction modules
+259
View File
@@ -0,0 +1,259 @@
/**
* LocalOpenAITTSModule
* Provides TTS via local or self-hosted OpenAI-compatible /audio/speech APIs.
*/
import { ApiTTSModuleBase } from './api-tts-module-base.js';
export class LocalOpenAITTSModule extends ApiTTSModuleBase {
constructor() {
super('local-openai-tts', 'Local OpenAI TTS');
this.voiceOptions = {
voice: 'alloy',
model: 'tts-1',
speed: 1.0,
response_format: 'mp3'
};
this.voices = [];
}
getDefaultApiBaseUrl() {
return 'http://localhost:8000/v1';
}
async initialize() {
try {
this.reportProgress(10, 'Initializing Local OpenAI TTS');
const parentInit = await super.initialize();
if (!parentInit) {
console.error('Local OpenAI TTS: Parent initialization failed');
return false;
}
const persistenceManager = this.getModule('persistence-manager');
if (!persistenceManager) {
console.error('Local OpenAI TTS: Required dependency persistence-manager not found');
return false;
}
const preferredVoice = persistenceManager.getPreference('tts', `${this.id}_voice`, this.voiceOptions.voice);
if (preferredVoice) {
this.voiceOptions.voice = this.normalizeTextOption(preferredVoice, this.voiceOptions.voice);
}
const preferredModel = persistenceManager.getPreference('tts', `${this.id}_model`, this.voiceOptions.model);
if (preferredModel) {
this.voiceOptions.model = this.normalizeTextOption(preferredModel, this.voiceOptions.model);
}
const preferredFormat = persistenceManager.getPreference('tts', `${this.id}_format`, this.voiceOptions.response_format);
if (preferredFormat) {
this.voiceOptions.response_format = this.normalizeResponseFormat(preferredFormat);
}
const preferredSpeed = persistenceManager.getPreference('tts', 'speed', this.voiceOptions.speed);
if (typeof preferredSpeed === 'number') {
this.voiceOptions.speed = this.normalizeAppSpeed(preferredSpeed);
}
this.isReady = Boolean(this.apiBaseUrl && this.voiceOptions.voice && this.voiceOptions.model);
this.reportProgress(100, this.isReady ? 'Local OpenAI TTS initialized' : 'Local OpenAI TTS not configured');
return true;
} catch (error) {
console.error('Local OpenAI TTS: Initialization error:', error);
this.isReady = false;
return false;
}
}
async loadVoices() {
this.voices = [];
return true;
}
selectVoiceForLocale() {
return this.selectDefaultVoice();
}
selectDefaultVoice() {
this.voiceOptions.voice = this.normalizeTextOption(this.voiceOptions.voice, 'alloy');
return true;
}
getAvailableVoices() {
return [];
}
async getVoices() {
return [];
}
async generateSpeechAudio(text, options = {}) {
if (!this.isReady || !this.apiBaseUrl) {
return { success: false, reason: 'not_ready' };
}
try {
const processedText = this.preprocessText(text);
if (!processedText) {
return { success: false, reason: 'empty_text' };
}
const payload = {
model: this.normalizeTextOption(this.voiceOptions.model, 'tts-1'),
input: processedText,
voice: this.normalizeTextOption(this.voiceOptions.voice, 'alloy'),
response_format: this.normalizeResponseFormat(this.voiceOptions.response_format),
speed: this.getApiSpeed(this.voiceOptions.speed)
};
const headers = {
'Content-Type': 'application/json'
};
if (this.apiKey) {
headers.Authorization = `Bearer ${this.apiKey}`;
}
const response = await fetch(`${this.apiBaseUrl.replace(/\/+$/, '')}/audio/speech`, {
method: 'POST',
headers,
body: JSON.stringify(payload),
signal: options.signal
});
if (!response.ok) {
const errorText = await response.text();
throw new Error(`API error: ${response.status} ${response.statusText} - ${errorText}`);
}
const audioBlob = await response.blob();
const arrayBuffer = await audioBlob.arrayBuffer();
return {
success: true,
audioData: arrayBuffer
};
} catch (error) {
if (error?.name === 'AbortError') {
console.error('Local OpenAI TTS: Speech request was aborted:', error);
return {
success: false,
reason: 'aborted',
error: error.message
};
}
console.error('Local OpenAI TTS: Error generating speech:', error);
return {
success: false,
reason: 'api_error',
error: error.message
};
}
}
setVoiceOptions(options = {}) {
const persistenceManager = this.getModule('persistence-manager');
if (typeof options.voice === 'string') {
this.voiceOptions.voice = this.normalizeTextOption(options.voice, this.voiceOptions.voice);
if (persistenceManager) {
persistenceManager.updatePreference('tts', `${this.id}_voice`, this.voiceOptions.voice);
}
}
if (typeof options.speed === 'number') {
this.voiceOptions.speed = this.normalizeAppSpeed(options.speed);
}
if (typeof options.model === 'string') {
this.voiceOptions.model = this.normalizeTextOption(options.model, this.voiceOptions.model);
if (persistenceManager) {
persistenceManager.updatePreference('tts', `${this.id}_model`, this.voiceOptions.model);
}
}
if (typeof options.response_format === 'string') {
this.voiceOptions.response_format = this.normalizeResponseFormat(options.response_format);
if (persistenceManager) {
persistenceManager.updatePreference('tts', `${this.id}_format`, this.voiceOptions.response_format);
}
}
this.isReady = Boolean(this.apiBaseUrl && this.voiceOptions.voice && this.voiceOptions.model);
this.notifyReadyState();
}
handleApiKeyChanged(event) {
if (!event?.detail || event.detail.provider !== this.id) return;
const newKey = event.detail.key || '';
if (newKey && /^https?:\/\//i.test(newKey)) {
console.error('Local OpenAI TTS: Received URL instead of API key, ignoring it');
return;
}
const oldKey = this.apiKey;
this.apiKey = newKey;
const persistenceManager = this.getModule('persistence-manager');
if (persistenceManager && oldKey !== newKey) {
persistenceManager.updatePreference('tts', `${this.id}_api_key`, newKey);
}
const wasReady = this.isReady;
this.isReady = Boolean(this.apiBaseUrl && this.voiceOptions.voice && this.voiceOptions.model);
if (wasReady !== this.isReady) {
this.notifyReadyState();
}
}
handleApiUrlChanged(event) {
if (!event?.detail || event.detail.provider !== this.id) return;
const oldUrl = this.apiBaseUrl;
const newUrl = String(event.detail.url || this.getDefaultApiBaseUrl()).trim().replace(/\/+$/, '');
this.apiBaseUrl = newUrl;
const persistenceManager = this.getModule('persistence-manager');
if (persistenceManager && oldUrl !== newUrl) {
persistenceManager.updatePreference('tts', `${this.id}_api_url`, newUrl);
}
const wasReady = this.isReady;
this.isReady = Boolean(this.apiBaseUrl && this.voiceOptions.voice && this.voiceOptions.model);
if (wasReady !== this.isReady || oldUrl !== newUrl) {
this.notifyReadyState();
}
}
normalizeTextOption(value, fallback) {
const text = String(value || '').trim();
return text || fallback;
}
normalizeResponseFormat(value) {
const format = String(value || '').trim().toLowerCase();
const validFormats = ['mp3', 'opus', 'aac', 'flac', 'wav', 'pcm'];
return validFormats.includes(format) ? format : 'mp3';
}
getApiSpeed(speed) {
const value = Number.isFinite(Number(speed)) ? Number(speed) : this.normalizeAppSpeed(speed);
return Math.max(0.25, Math.min(4.0, value));
}
normalizeAppSpeed(speed) {
const value = Number.isFinite(Number(speed)) ? Number(speed) : 1.0;
return Math.max(0.5, Math.min(2.0, value));
}
}
const localOpenAITTSModule = new LocalOpenAITTSModule();
export { localOpenAITTSModule };
if (window.moduleRegistry) {
window.moduleRegistry.register(localOpenAITTSModule);
}
window.LocalOpenAITTSModule = localOpenAITTSModule;
+48
View File
@@ -19,6 +19,8 @@ class MarkupParserModule extends BaseModule {
'parseParagraph',
'parseInline',
'extractGlossaryTags',
'extractTtsInstructionTags',
'normalizeTtsInstructionProvider',
'parseImageOptions',
'parseSfxOptions',
'parseMusicOptions',
@@ -243,6 +245,52 @@ class MarkupParserModule extends BaseModule {
.sort((a, b) => b.term.length - a.term.length);
}
extractTtsInstructionTags(tags = []) {
if (!Array.isArray(tags)) return [];
return tags
.map(tag => {
const key = String(tag?.key || '').toLowerCase();
const value = String(tag?.value || '').trim();
const param = String(tag?.param || '').trim();
if (key === 'tts') {
if (param) {
return {
provider: this.normalizeTtsInstructionProvider(value),
instruction: param
};
}
return {
provider: null,
instruction: value
};
}
if (key.startsWith('tts-') && value) {
return {
provider: this.normalizeTtsInstructionProvider(key.slice(4)),
instruction: value
};
}
return null;
})
.filter(entry => entry && entry.instruction);
}
normalizeTtsInstructionProvider(provider) {
const normalized = String(provider || '').trim().toLowerCase();
if (!normalized) return null;
if (normalized === 'openai' || normalized === 'openai-tts') return 'openai-tts';
if (normalized === 'local-openai' || normalized === 'local-openai-tts') return 'local-openai-tts';
if (normalized === 'elevenlabs' || normalized === 'elevenlabs-tts') return 'elevenlabs-tts';
if (normalized === 'kokoro' || normalized === 'kokoro-tts') return 'kokoro-tts';
if (normalized === 'browser' || normalized === 'browser-tts') return 'browser-tts';
return normalized;
}
smartypants(text) {
const result = String(text)
.replace(/---/g, '\u2014')
+106 -18
View File
@@ -8,7 +8,13 @@ export class OpenAITTSModule extends ApiTTSModuleBase {
constructor() {
super('openai-tts', 'OpenAI TTS');
this.supportedVoices = [
this.supportedModels = [
{ id: 'tts-1', name: 'TTS-1' },
{ id: 'tts-1-hd', name: 'TTS-1 HD' },
{ id: 'gpt-4o-mini-tts', name: 'GPT-4o mini TTS' }
];
this.legacyVoices = [
{ id: 'alloy', name: 'Alloy', language: 'en' },
{ id: 'ash', name: 'Ash', language: 'en' },
{ id: 'coral', name: 'Coral', language: 'en' },
@@ -19,6 +25,25 @@ export class OpenAITTSModule extends ApiTTSModuleBase {
{ id: 'sage', name: 'Sage', language: 'en' },
{ id: 'shimmer', name: 'Shimmer', language: 'en' }
];
this.gpt4oMiniVoices = [
{ id: 'alloy', name: 'Alloy', language: 'en' },
{ id: 'ash', name: 'Ash', language: 'en' },
{ id: 'ballad', name: 'Ballad', language: 'en' },
{ id: 'coral', name: 'Coral', language: 'en' },
{ id: 'echo', name: 'Echo', language: 'en' },
{ id: 'fable', name: 'Fable', language: 'en' },
{ id: 'nova', name: 'Nova', language: 'en' },
{ id: 'onyx', name: 'Onyx', language: 'en' },
{ id: 'sage', name: 'Sage', language: 'en' },
{ id: 'shimmer', name: 'Shimmer', language: 'en' },
{ id: 'verse', name: 'Verse', language: 'en' },
{ id: 'marin', name: 'Marin', language: 'en' },
{ id: 'cedar', name: 'Cedar', language: 'en' }
];
this.supportedVoices = [...this.gpt4oMiniVoices];
this.supportsTtsInstructions = true;
// Voice options specific to OpenAI
this.voiceOptions = {
@@ -62,15 +87,6 @@ export class OpenAITTSModule extends ApiTTSModuleBase {
return false;
}
// API key is already loaded in parent initialize() method
// Just check if it's available
if (!this.apiKey) {
console.info('OpenAI TTS: API key not configured; provider unavailable until configured');
this.isReady = false;
this.reportProgress(100, 'OpenAI TTS not configured');
return true;
}
// Load preferences
const preferredVoice = persistenceManager.getPreference('tts', `${this.id}_voice`, this.voiceOptions.voice);
if (preferredVoice) {
@@ -79,12 +95,25 @@ export class OpenAITTSModule extends ApiTTSModuleBase {
const preferredModel = persistenceManager.getPreference('tts', `${this.id}_model`, this.voiceOptions.model);
if (preferredModel) {
this.voiceOptions.model = preferredModel;
this.voiceOptions.model = this.normalizeModelId(preferredModel);
}
this.voices = this.getAvailableVoices();
this.voiceOptions.voice = this.normalizeVoiceId(this.voiceOptions.voice);
const preferredSpeed = persistenceManager.getPreference('tts', 'speed', this.voiceOptions.speed);
if (typeof preferredSpeed === 'number') {
this.voiceOptions.speed = this.getApiSpeed(preferredSpeed);
this.voiceOptions.speed = this.normalizeAppSpeed(preferredSpeed);
}
// API key is already loaded in parent initialize() method.
// Model and voice preferences still need to be available for the
// options UI even before credentials are configured.
if (!this.apiKey) {
console.info('OpenAI TTS: API key not configured; provider unavailable until configured');
this.isReady = false;
this.reportProgress(100, 'OpenAI TTS not configured');
return true;
}
const apiReachable = await this.loadVoices();
@@ -164,9 +193,13 @@ export class OpenAITTSModule extends ApiTTSModuleBase {
* @returns {Array} - Array of voice objects
*/
getAvailableVoices() {
this.voices = [...this.supportedVoices];
this.voices = this.getVoicesForModel(this.voiceOptions.model);
return this.voices;
}
async getVoices() {
return this.getAvailableVoices();
}
/**
* Generate speech audio data using OpenAI API
@@ -190,6 +223,11 @@ export class OpenAITTSModule extends ApiTTSModuleBase {
response_format: this.voiceOptions.response_format || 'mp3',
speed: this.getApiSpeed(this.voiceOptions.speed)
};
const instructions = this.getRequestInstructions(options);
if (instructions) {
payload.instructions = instructions;
}
// Make API request
const response = await fetch(`${this.apiBaseUrl}/audio/speech`, {
@@ -246,17 +284,20 @@ export class OpenAITTSModule extends ApiTTSModuleBase {
if (typeof options.speed === 'number') {
// OpenAI speech speed uses 1.0 as normal. The app-wide slider also
// uses 1.0 as normal, so only clamp at the provider API boundary.
this.voiceOptions.speed = this.getApiSpeed(options.speed);
this.voiceOptions.speed = this.normalizeAppSpeed(options.speed);
}
// Handle OpenAI-specific options
if (options.model) {
this.voiceOptions.model = options.model;
this.voiceOptions.model = this.normalizeModelId(options.model);
this.voices = this.getAvailableVoices();
this.voiceOptions.voice = this.normalizeVoiceId(this.voiceOptions.voice);
// Save the model preference
const persistenceManager = this.getModule('persistence-manager');
if (persistenceManager) {
persistenceManager.updatePreference('tts', `${this.id}_model`, options.model);
persistenceManager.updatePreference('tts', `${this.id}_model`, this.voiceOptions.model);
persistenceManager.updatePreference('tts', `${this.id}_voice`, this.voiceOptions.voice);
}
}
@@ -283,7 +324,7 @@ export class OpenAITTSModule extends ApiTTSModuleBase {
normalizeVoiceId(voice) {
const voiceId = this.getVoiceId(voice).toLowerCase();
const supported = new Set(this.supportedVoices.map(item => item.id));
const supported = new Set(this.getVoicesForModel(this.voiceOptions.model).map(item => item.id));
if (supported.has(voiceId)) {
return voiceId;
@@ -296,10 +337,57 @@ export class OpenAITTSModule extends ApiTTSModuleBase {
return 'alloy';
}
normalizeModelId(model) {
const modelId = String(model || '').trim();
const supported = new Set(this.supportedModels.map(item => item.id));
if (supported.has(modelId)) {
return modelId;
}
if (modelId) {
console.warn(`OpenAI TTS: Unsupported model "${modelId}", falling back to tts-1-hd`);
}
return 'tts-1-hd';
}
getVoicesForModel(model) {
const modelId = this.normalizeModelId(model || this.voiceOptions.model);
if (modelId === 'gpt-4o-mini-tts') {
return [...this.gpt4oMiniVoices];
}
return [...this.legacyVoices];
}
getRequestInstructions(options = {}) {
if (this.normalizeModelId(this.voiceOptions.model) !== 'gpt-4o-mini-tts') {
return '';
}
const instructions = Array.isArray(options.ttsInstructions)
? options.ttsInstructions
: [];
const matching = instructions
.filter(entry => {
const provider = String(entry?.provider || '').trim();
return !provider || provider === this.id;
})
.map(entry => String(entry?.instruction || '').trim())
.filter(Boolean);
return matching.length > 0 ? matching[matching.length - 1] : '';
}
getApiSpeed(speed) {
const value = Number.isFinite(speed) ? speed : 1.0;
const value = Number.isFinite(Number(speed)) ? Number(speed) : this.normalizeAppSpeed(speed);
return Math.max(0.25, Math.min(4.0, value));
}
normalizeAppSpeed(speed) {
const value = Number.isFinite(Number(speed)) ? Number(speed) : 1.0;
return Math.max(0.5, Math.min(2.0, value));
}
}
const openAITTSModule = new OpenAITTSModule();
+197 -2
View File
@@ -37,6 +37,8 @@ class OptionsUIModule extends BaseModule {
'createModal',
'populateTtsSystems',
'populateVoices',
'ensureSelectedVoiceIsAvailable',
'updateVoiceControlVisibility',
'populateLanguages',
'loadPreferences',
'createVolumeControl',
@@ -233,10 +235,10 @@ class OptionsUIModule extends BaseModule {
this.elements.ttsSpeed = createUIElement('input', {
type: 'range',
min: 50,
max: 150,
max: 200,
value: 100,
'data-pref-bind': 'tts.speed',
'data-pref-transform': 'centered-speed'
'data-pref-transform': 'multiplier-percent'
}, null, speedContainer);
// Update displayed value when slider changes
@@ -301,6 +303,14 @@ class OptionsUIModule extends BaseModule {
this.elements.ttsVoice = createUIElement('select', {
'data-pref-bind': 'tts.voice'
}, null, ttsVoiceContainer);
this.elements.localOpenAiVoice = createUIElement('input', {
id: 'local-openai-voice',
type: 'text',
placeholder: 'alloy',
'data-pref-bind': 'tts.local-openai-tts_voice'
}, null, ttsVoiceContainer);
this.elements.localOpenAiVoice.style.display = 'none';
ttsSection.appendChild(ttsVoiceContainer);
@@ -503,10 +513,108 @@ class OptionsUIModule extends BaseModule {
}, null, openaiApiUrlContainer);
openaiSettings.appendChild(openaiApiUrlContainer);
const openaiModelContainer = document.createElement('div');
openaiModelContainer.className = 'option-item';
const openaiModelLabel = document.createElement('label');
openaiModelLabel.textContent = this.t('options.model') + ':';
openaiModelContainer.appendChild(openaiModelLabel);
this.elements.openaiModel = createUIElement('select', {
id: 'openai-model',
'data-pref-bind': 'tts.openai-tts_model'
}, null, openaiModelContainer);
[
{ id: 'tts-1', name: 'TTS-1' },
{ id: 'tts-1-hd', name: 'TTS-1 HD' },
{ id: 'gpt-4o-mini-tts', name: 'GPT-4o mini TTS' }
].forEach(model => {
const option = document.createElement('option');
option.value = model.id;
option.textContent = model.name;
this.elements.openaiModel.appendChild(option);
});
openaiSettings.appendChild(openaiModelContainer);
// Local OpenAI-compatible API settings
const localOpenAiSettings = document.createElement('div');
localOpenAiSettings.className = 'api-settings local-openai-tts-settings';
localOpenAiSettings.style.display = 'none';
const localOpenAiTitle = document.createElement('h3');
localOpenAiTitle.textContent = this.t('options.localOpenAiSettings');
localOpenAiSettings.appendChild(localOpenAiTitle);
const localOpenAiApiKeyContainer = document.createElement('div');
localOpenAiApiKeyContainer.className = 'option-item';
const localOpenAiApiKeyLabel = document.createElement('label');
localOpenAiApiKeyLabel.textContent = this.t('options.optionalApiKey') + ':';
localOpenAiApiKeyContainer.appendChild(localOpenAiApiKeyLabel);
this.elements.localOpenAiApiKey = createUIElement('input', {
type: 'password',
'data-pref-bind': 'tts.local-openai-tts_api_key'
}, null, localOpenAiApiKeyContainer);
localOpenAiSettings.appendChild(localOpenAiApiKeyContainer);
const localOpenAiApiUrlContainer = document.createElement('div');
localOpenAiApiUrlContainer.className = 'option-item';
const localOpenAiApiUrlLabel = document.createElement('label');
localOpenAiApiUrlLabel.textContent = this.t('options.apiUrl') + ':';
localOpenAiApiUrlContainer.appendChild(localOpenAiApiUrlLabel);
this.elements.localOpenAiApiUrl = createUIElement('input', {
type: 'text',
'data-pref-bind': 'tts.local-openai-tts_api_url'
}, null, localOpenAiApiUrlContainer);
localOpenAiSettings.appendChild(localOpenAiApiUrlContainer);
const localOpenAiModelContainer = document.createElement('div');
localOpenAiModelContainer.className = 'option-item';
const localOpenAiModelLabel = document.createElement('label');
localOpenAiModelLabel.textContent = this.t('options.model') + ':';
localOpenAiModelContainer.appendChild(localOpenAiModelLabel);
this.elements.localOpenAiModel = createUIElement('input', {
id: 'local-openai-model',
type: 'text',
placeholder: 'tts-1',
'data-pref-bind': 'tts.local-openai-tts_model'
}, null, localOpenAiModelContainer);
localOpenAiSettings.appendChild(localOpenAiModelContainer);
const localOpenAiTimeoutContainer = document.createElement('div');
localOpenAiTimeoutContainer.className = 'option-item';
const localOpenAiTimeoutLabel = document.createElement('label');
localOpenAiTimeoutLabel.textContent = this.t('options.requestTimeoutMs') + ':';
localOpenAiTimeoutContainer.appendChild(localOpenAiTimeoutLabel);
this.elements.localOpenAiTimeout = createUIElement('input', {
id: 'local-openai-timeout-ms',
type: 'number',
min: 1000,
max: 600000,
step: 1000,
'data-pref-bind': 'tts.local-openai-tts_timeout_ms',
'data-pref-transform': 'integer:1000,600000'
}, null, localOpenAiTimeoutContainer);
localOpenAiSettings.appendChild(localOpenAiTimeoutContainer);
// Add all API settings to container
apiSettings.appendChild(elevenLabsSettings);
apiSettings.appendChild(openaiSettings);
apiSettings.appendChild(localOpenAiSettings);
return apiSettings;
}
@@ -622,6 +730,15 @@ class OptionsUIModule extends BaseModule {
if (!ttsFactory || !this.elements.ttsVoice) return;
const selectedHandler = this.elements.ttsSystem?.value || this.getPreference('tts', 'preferred_handler', 'none');
this.updateVoiceControlVisibility(selectedHandler);
if (selectedHandler === 'local-openai-tts') {
if (this.elements.localOpenAiVoice) {
this.elements.localOpenAiVoice.value = this.getPreference('tts', 'local-openai-tts_voice', 'alloy');
}
return;
}
const voices = typeof ttsFactory.getVoicesForHandler === 'function'
? await ttsFactory.getVoicesForHandler(selectedHandler) || []
: await ttsFactory.getVoices() || [];
@@ -635,6 +752,34 @@ class OptionsUIModule extends BaseModule {
'name',
this.getPreference('tts', `${selectedHandler}_voice`, this.getPreference('tts', 'voice', ''))
);
this.ensureSelectedVoiceIsAvailable(selectedHandler, voices);
}
ensureSelectedVoiceIsAvailable(selectedHandler, voices = []) {
if (!this.elements.ttsVoice || selectedHandler === 'local-openai-tts') return;
if (!Array.isArray(voices) || voices.length === 0) return;
const available = new Set(voices.map(voice => String(voice.id || '').toLowerCase()));
const current = String(this.elements.ttsVoice.value || '').toLowerCase();
if (current && available.has(current)) return;
const fallback = voices.some(voice => voice.id === 'alloy') ? 'alloy' : voices[0].id;
this.elements.ttsVoice.value = fallback;
this.updatePreference('tts', 'voice', fallback);
if (selectedHandler && selectedHandler !== 'none') {
this.updatePreference('tts', `${selectedHandler}_voice`, fallback);
}
}
updateVoiceControlVisibility(selectedHandler) {
const useTextVoice = selectedHandler === 'local-openai-tts';
if (this.elements.ttsVoice) {
this.elements.ttsVoice.style.display = useTextVoice ? 'none' : '';
}
if (this.elements.localOpenAiVoice) {
this.elements.localOpenAiVoice.style.display = useTextVoice ? '' : 'none';
}
}
renderProviderStatuses() {
@@ -698,6 +843,7 @@ class OptionsUIModule extends BaseModule {
// Update API settings visibility based on current TTS system
if (this.elements.ttsSystem) {
this.updateApiSettingsVisibility(this.elements.ttsSystem.value);
this.updateVoiceControlVisibility(this.elements.ttsSystem.value);
}
}
@@ -753,6 +899,36 @@ class OptionsUIModule extends BaseModule {
if (!this.getPreference('tts', 'openai-tts_api_key')) {
this.updatePreference('tts', 'openai-tts_api_key', '');
}
if (!this.getPreference('tts', 'openai-tts_model')) {
this.updatePreference('tts', 'openai-tts_model', 'tts-1-hd');
}
if (this.elements.localOpenAiApiUrl) {
const savedUrl = this.getPreference('tts', 'local-openai-tts_api_url');
const defaultUrl = 'http://localhost:8000/v1';
if (!savedUrl) {
console.log('Options UI: Setting default local OpenAI-compatible API URL:', defaultUrl);
this.updatePreference('tts', 'local-openai-tts_api_url', defaultUrl);
}
}
if (!this.getPreference('tts', 'local-openai-tts_api_key')) {
this.updatePreference('tts', 'local-openai-tts_api_key', '');
}
if (!this.getPreference('tts', 'local-openai-tts_voice')) {
this.updatePreference('tts', 'local-openai-tts_voice', 'alloy');
}
if (!this.getPreference('tts', 'local-openai-tts_model')) {
this.updatePreference('tts', 'local-openai-tts_model', 'tts-1');
}
if (!this.getPreference('tts', 'local-openai-tts_timeout_ms')) {
this.updatePreference('tts', 'local-openai-tts_timeout_ms', 60000);
}
}
/**
@@ -895,6 +1071,7 @@ class OptionsUIModule extends BaseModule {
this.renderProviderStatuses();
});
this.updateApiSettingsVisibility(value);
this.updateVoiceControlVisibility(value);
} else if (key === 'voice') {
ttsFactory.configure({ voice: value });
} else if (key === 'speed') {
@@ -919,6 +1096,24 @@ class OptionsUIModule extends BaseModule {
const provider = key.replace('_api_url', '');
this.dispatchApiChangeEvent('api:urlChanged', provider, 'url', value);
ttsFactory.refreshHandlerStatus(provider).then(() => this.renderProviderStatuses());
} else if (key.endsWith('_voice')) {
const provider = key.replace('_voice', '');
const handler = typeof ttsFactory.getHandler === 'function' ? ttsFactory.getHandler(provider) : null;
if (handler && typeof handler.setVoiceOptions === 'function') {
handler.setVoiceOptions({ voice: value });
}
if (ttsFactory.activeHandler === provider) {
ttsFactory.voice = value;
}
} else if (key.endsWith('_model')) {
const provider = key.replace('_model', '');
const handler = typeof ttsFactory.getHandler === 'function' ? ttsFactory.getHandler(provider) : null;
if (handler && typeof handler.setVoiceOptions === 'function') {
handler.setVoiceOptions({ model: value });
}
if (provider === 'openai-tts') {
this.populateVoices();
}
}
if (key === 'speed' && this.elements.ttsSpeed) {
this.updateSpeedDisplay();
+41 -5
View File
@@ -35,10 +35,20 @@ class PersistenceManagerModule extends BaseModule {
speed: 1.0,
language: 'en_US',
voice: '',
'browser-tts_timeout_ms': 60000,
'kokoro-tts_timeout_ms': 60000,
'elevenlabs-tts_api_key': '',
'elevenlabs-tts_api_url': 'https://api.elevenlabs.io/v1',
'elevenlabs-tts_timeout_ms': 60000,
'openai-tts_api_key': '',
'openai-tts_api_url': 'https://api.openai.com/v1'
'openai-tts_api_url': 'https://api.openai.com/v1',
'openai-tts_model': 'tts-1-hd',
'openai-tts_timeout_ms': 60000,
'local-openai-tts_api_key': '',
'local-openai-tts_api_url': 'http://localhost:8000/v1',
'local-openai-tts_voice': 'alloy',
'local-openai-tts_model': 'tts-1',
'local-openai-tts_timeout_ms': 60000
},
audio: {
masterVolume: 1.0,
@@ -629,13 +639,39 @@ class PersistenceManagerModule extends BaseModule {
// Check if it's a range transformer in format 'range:min,max'
if (element.dataset.prefTransform === 'centered-speed') {
transformer = {
toElement: (value) => Math.round(((Number(value) || 1) * 50) + 50),
toPreference: (value) => Math.max(0.5, Math.min(2.0, (parseInt(value, 10) - 50) / 50))
toElement: (value) => Math.round(Math.max(0.5, Math.min(2.0, Number(value) || 1)) * 100),
toPreference: (value) => {
const percent = parseInt(value, 10);
return Math.max(0.5, Math.min(2.0, (Number.isFinite(percent) ? percent : 100) / 100));
}
};
} else if (element.dataset.prefTransform === 'multiplier-percent') {
transformer = {
toElement: (value) => Math.round((Number(value) || 1) * 100),
toPreference: (value) => Math.max(0.25, Math.min(4.0, parseInt(value, 10) / 100))
toElement: (value) => Math.round(Math.max(0.5, Math.min(2.0, Number(value) || 1)) * 100),
toPreference: (value) => {
const percent = parseInt(value, 10);
return Math.max(0.5, Math.min(2.0, (Number.isFinite(percent) ? percent : 100) / 100));
}
};
} else if (element.dataset.prefTransform.startsWith('integer:')) {
const rangeValues = element.dataset.prefTransform.substring(8).split(',');
const min = Number.parseInt(rangeValues[0], 10);
const max = Number.parseInt(rangeValues[1], 10);
transformer = {
toElement: (value) => Number.parseInt(value, 10),
toPreference: (value) => {
const parsed = Number.parseInt(value, 10);
if (!Number.isFinite(parsed)) {
return Number.isFinite(min) ? min : 0;
}
if (Number.isFinite(min) && parsed < min) {
return min;
}
if (Number.isFinite(max) && parsed > max) {
return max;
}
return parsed;
}
};
} else if (element.dataset.prefTransform.startsWith('range:')) {
const rangeValues = element.dataset.prefTransform.substring(6).split(',');
+46 -8
View File
@@ -45,6 +45,8 @@ class SentenceQueueModule extends BaseModule {
'prepareSpeechMetadata',
'preloadAssetsForItem',
'normalizeTtsText',
'getConfiguredTtsGenerationTimeoutMs',
'normalizeTtsGenerationTimeoutMs',
'runTtsPreloadWithTimeout',
'cancelBlockingGeneration',
'cancelGenerationRequests',
@@ -89,19 +91,25 @@ class SentenceQueueModule extends BaseModule {
const persistenceManager = this.getModule('persistence-manager');
if (persistenceManager && typeof persistenceManager.getPreference === 'function') {
this.autoplay = persistenceManager.getPreference('app', 'autoplay', true) !== false;
this.ttsGenerationTimeoutMs = this.getConfiguredTtsGenerationTimeoutMs();
}
this.addEventListener(document, 'preference-updated', (event) => {
const { category, key, value } = event.detail || {};
if (category === 'app' && key === 'autoplay') {
this.autoplay = value !== false;
}
if (category === 'tts' && (key === 'preferred_handler' || key.endsWith('_timeout_ms'))) {
this.ttsGenerationTimeoutMs = this.getConfiguredTtsGenerationTimeoutMs();
}
});
this.addEventListener(document, 'story:input-mode', (event) => {
this.inputMode = ['text', 'choice', 'end'].includes(event.detail) ? event.detail : 'text';
});
this.addEventListener(document, 'ui:command', (event) => {
if (event.detail?.type === 'continue') {
this.lastContinueAt = performance.now();
if (event.detail?.source !== 'display-clear') {
this.lastContinueAt = performance.now();
}
this.cancelBlockingGeneration('user-fast-forward', {
minWaitMs: USER_CANCEL_BLOCKING_WAIT_MIN_MS
});
@@ -305,11 +313,35 @@ class SentenceQueueModule extends BaseModule {
.trim();
}
getConfiguredTtsGenerationTimeoutMs() {
const persistenceManager = this.getModule('persistence-manager');
if (!persistenceManager || typeof persistenceManager.getPreference !== 'function') {
return TTS_GENERATION_TIMEOUT_MS;
}
const preferredHandler = persistenceManager.getPreference('tts', 'preferred_handler', 'none');
const providerTimeout = preferredHandler && preferredHandler !== 'none'
? persistenceManager.getPreference('tts', `${preferredHandler}_timeout_ms`)
: undefined;
const genericTimeout = persistenceManager.getPreference('tts', 'generation_timeout_ms');
return this.normalizeTtsGenerationTimeoutMs(providerTimeout ?? genericTimeout ?? TTS_GENERATION_TIMEOUT_MS);
}
normalizeTtsGenerationTimeoutMs(value) {
const timeout = Number(value);
if (!Number.isFinite(timeout)) {
return TTS_GENERATION_TIMEOUT_MS;
}
return Math.max(1000, Math.min(600000, Math.round(timeout)));
}
runTtsPreloadWithTimeout(ttsFactory, text, context = {}) {
const sentenceId = context.sentenceId || context.id || `tts-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
const requestId = `${sentenceId}:${context.prefetch ? 'prefetch' : 'blocking'}:${Date.now()}`;
const controller = new AbortController();
const startedAt = performance.now();
const timeoutMs = this.getConfiguredTtsGenerationTimeoutMs();
return new Promise((resolve) => {
let settled = false;
@@ -324,12 +356,12 @@ class SentenceQueueModule extends BaseModule {
const timeoutId = setTimeout(() => {
console.warn('SentenceQueue: TTS generation timed out; continuing without audio', {
sentenceId,
timeoutMs: this.ttsGenerationTimeoutMs,
timeoutMs,
textPreview: text.slice(0, 120)
});
controller.abort('tts-generation-timeout');
finish({ success: false, reason: 'tts_generation_timeout', timedOut: true });
}, this.ttsGenerationTimeoutMs);
}, timeoutMs);
this.generationRequests.set(requestId, {
controller,
@@ -340,7 +372,10 @@ class SentenceQueueModule extends BaseModule {
finish
});
Promise.resolve(ttsFactory.preloadSpeech(text, { signal: controller.signal }))
Promise.resolve(ttsFactory.preloadSpeech(text, {
signal: controller.signal,
ttsInstructions: Array.isArray(context.ttsInstructions) ? context.ttsInstructions : []
}))
.then(result => finish(result || { success: false, reason: 'empty_tts_result' }))
.catch(error => {
if (controller.signal.aborted) {
@@ -426,7 +461,10 @@ class SentenceQueueModule extends BaseModule {
let speedMultiplier = 1.0;
const ttsFactory = this.getModule('tts-factory');
if (ttsFactory) {
speedMultiplier = Number.isFinite(ttsFactory.speed) ? Math.max(0.25, ttsFactory.speed) : 1.0;
const configuredSpeed = Number(ttsFactory.speed);
speedMultiplier = Number.isFinite(configuredSpeed)
? Math.max(0.5, Math.min(2.0, configuredSpeed))
: 1.0;
}
// Calculate estimated duration in milliseconds
@@ -486,6 +524,7 @@ class SentenceQueueModule extends BaseModule {
sentenceId: id,
blockId: metadata.blockId ?? null,
turnId: metadata.turnId ?? null,
ttsInstructions: Array.isArray(metadata.ttsInstructions) ? metadata.ttsInstructions : [],
blocking: true
});
@@ -501,6 +540,7 @@ class SentenceQueueModule extends BaseModule {
paragraphIndex: metadata.paragraphIndex ?? null,
layoutText: metadata.layoutText || text,
glossaryEntries: Array.isArray(metadata.glossaryEntries) ? metadata.glossaryEntries : [],
ttsInstructions: Array.isArray(metadata.ttsInstructions) ? metadata.ttsInstructions : [],
isFirstParagraphInChapter: Boolean(metadata.isFirstParagraphInChapter),
role: metadata.role || (metadata.type === 'heading' ? 'chapter-heading' : 'body'),
dropCap: Boolean(metadata.dropCap),
@@ -753,9 +793,6 @@ class SentenceQueueModule extends BaseModule {
if (this.lastContinueAt >= (sentence.playbackStartedAt || 0)) {
return false;
}
if (this.inputMode === 'choice') {
return false;
}
return this.sentenceQueue.length > 1;
}
@@ -848,6 +885,7 @@ class SentenceQueueModule extends BaseModule {
sentenceId: nextItem.id,
blockId: nextItem.blockId ?? null,
turnId: nextItem.turnId ?? null,
ttsInstructions: Array.isArray(nextItem.ttsInstructions) ? nextItem.ttsInstructions : [],
queueIndex: index,
prefetch: true,
blocking: false
+8 -3
View File
@@ -291,12 +291,13 @@ class SocketClientModule extends BaseModule {
}
}
await this.storeAndQueueBlocks(turnBlocks);
const choices = Array.isArray(data.choices) ? data.choices : [];
const inputMode = data.inputMode || (choices.length > 0 ? 'choice' : 'none');
this.dispatchChoices(choices);
this.dispatchInputMode(inputMode);
await this.storeAndQueueBlocks(turnBlocks);
document.dispatchEvent(new CustomEvent('story:turn-complete', {
detail: { turnId, turn: data, choices, inputMode }
}));
@@ -392,6 +393,9 @@ class SocketClientModule extends BaseModule {
const glossaryEntries = markupParser && typeof markupParser.extractGlossaryTags === 'function'
? markupParser.extractGlossaryTags(tags)
: [];
const ttsInstructions = markupParser && typeof markupParser.extractTtsInstructionTags === 'function'
? markupParser.extractTtsInstructionTags(tags)
: [];
const cueTags = tags.filter(tag => this.isTimedCueTag(tag));
const deferredTags = tags.filter(tag => this.isDeferredPopupTag(tag));
const immediateTags = tags.filter(tag =>
@@ -433,6 +437,7 @@ class SocketClientModule extends BaseModule {
text,
layoutText,
glossaryEntries,
ttsInstructions,
cueMarkers,
deferredTags: [
...(Array.isArray(pending.deferredTags) ? pending.deferredTags : []),
@@ -503,7 +508,7 @@ class SocketClientModule extends BaseModule {
isRenderMetadataTag(tag) {
const key = String(tag?.key || '').toLowerCase();
return ['gloss'].includes(key);
return key === 'gloss' || key === 'tts' || key.startsWith('tts-');
}
isDeferredPopupTag(tag) {
+32 -9
View File
@@ -18,7 +18,8 @@ class TTSFactoryModule extends BaseModule {
'browser-tts', // Browser TTS handler
'kokoro-tts', // Kokoro TTS handler
'elevenlabs-tts',// ElevenLabs TTS handler
'openai-tts' // OpenAI TTS handler
'openai-tts', // OpenAI TTS handler
'local-openai-tts' // Local OpenAI-compatible TTS handler
];
this.handlers = {};
this.initStatus = {};
@@ -356,7 +357,7 @@ class TTSFactoryModule extends BaseModule {
}
// Add placeholder entries for important API handlers that might not be registered yet
const apiHandlerIds = ['elevenlabs-tts', 'openai-tts'];
const apiHandlerIds = ['elevenlabs-tts', 'openai-tts', 'local-openai-tts'];
for (const id of apiHandlerIds) {
// Only add if not already in the list
if (!this.handlers[id] && !availableHandlers.some(h => h.id === id)) {
@@ -407,10 +408,24 @@ class TTSFactoryModule extends BaseModule {
'voice': '', // Empty default - will be selected based on handler
'language': 'en_US', // Legacy stored value; game metadata now owns active TTS language
'volume': 1.0, // Default volume
'browser-tts_timeout_ms': 60000,
'kokoro-tts_timeout_ms': 60000,
'elevenlabs_api_key': '', // Empty API key by default
'elevenlabs_api_url': 'https://api.elevenlabs.io/v1', // Default ElevenLabs API URL
'openai_api_key': '', // Empty API key by default
'openai_api_url': 'https://api.openai.com/v1' // Default OpenAI API URL
'openai_api_url': 'https://api.openai.com/v1', // Default OpenAI API URL
'elevenlabs-tts_api_key': '',
'elevenlabs-tts_api_url': 'https://api.elevenlabs.io/v1',
'elevenlabs-tts_timeout_ms': 60000,
'openai-tts_api_key': '',
'openai-tts_api_url': 'https://api.openai.com/v1',
'openai-tts_model': 'tts-1-hd',
'openai-tts_timeout_ms': 60000,
'local-openai-tts_api_key': '',
'local-openai-tts_api_url': 'http://localhost:8000/v1',
'local-openai-tts_voice': 'alloy',
'local-openai-tts_model': 'tts-1',
'local-openai-tts_timeout_ms': 60000
};
// Ensure all defaults are set in persistence if they don't exist
@@ -475,7 +490,8 @@ class TTSFactoryModule extends BaseModule {
{ id: 'kokoro-tts', displayName: 'Kokoro TTS' },
{ id: 'browser-tts', displayName: 'Browser TTS' },
{ id: 'elevenlabs-tts', displayName: 'ElevenLabs TTS' },
{ id: 'openai-tts', displayName: 'OpenAI TTS' }
{ id: 'openai-tts', displayName: 'OpenAI TTS' },
{ id: 'local-openai-tts', displayName: 'Local OpenAI TTS' }
];
// Register each handler
@@ -780,7 +796,7 @@ class TTSFactoryModule extends BaseModule {
}
// Check if we have this speech cached
const hash = await this.generateSpeechHash(text);
const hash = await this.generateSpeechHash(text, options);
const cached = await this.getCachedSpeech(hash);
if (cached && cached.success) {
@@ -845,7 +861,7 @@ class TTSFactoryModule extends BaseModule {
try {
// Generate a hash for this speech request
const hash = await this.generateSpeechHash(text);
const hash = await this.generateSpeechHash(text, options);
// Check if we have this speech cached
const cached = await this.getCachedSpeech(hash);
@@ -1097,6 +1113,7 @@ class TTSFactoryModule extends BaseModule {
getHandlerStatusMessage(id, handler) {
if (!handler) return 'Not registered';
if (handler.isReady === true) return 'Ready';
if (handler.unsupportedReason) return handler.unsupportedReason;
if (id === 'kokoro-tts') return handler.state === 'INITIALIZING' ? 'Loading model' : 'Not loaded';
if (handler.apiKey === '') return 'API key missing';
if (handler.apiKey && handler.isReady !== true) return 'API unavailable or invalid settings';
@@ -1234,7 +1251,7 @@ class TTSFactoryModule extends BaseModule {
let generationStarted = false;
try {
// Generate a hash for this speech request
hash = await this.generateSpeechHash(text);
hash = await this.generateSpeechHash(text, options);
// Check if we have this audio in cache
const cachedData = await this.getCachedSpeech(hash);
@@ -1286,17 +1303,23 @@ class TTSFactoryModule extends BaseModule {
* @param {string} text - Text to generate hash for
* @returns {Promise<string>} - Hash string
*/
async generateSpeechHash(text) {
async generateSpeechHash(text, options = {}) {
const handler = this.getActiveHandler();
const provider = this.activeHandler || 'none';
const voiceInfo = this.getEffectiveVoiceId(handler);
const model = handler?.voiceOptions?.model || handler?.model || '';
const speed = this.speed || 1.0;
const language = this.language || 'en-us';
const ttsInstruction = handler && typeof handler.getRequestInstructions === 'function'
? handler.getRequestInstructions(options)
: '';
const key = JSON.stringify({
provider,
voice: voiceInfo,
model,
speed,
language,
ttsInstruction,
text
});
@@ -1933,7 +1956,7 @@ class TTSFactoryModule extends BaseModule {
const handler = this.handlers[id];
const isInitialized = !!this.initStatus[id];
const isReady = handler && handler.isReady;
const isApiHandler = ['elevenlabs', 'openai', 'kokoro'].includes(id);
const isApiHandler = ['elevenlabs-tts', 'openai-tts', 'local-openai-tts', 'kokoro-tts'].includes(id);
console.log(`Handler ID: ${id}`);
console.log(` - Handler Exists: ${!!handler}`);
+6 -7
View File
@@ -387,12 +387,12 @@ class UIControllerModule extends BaseModule {
sliderValueFromSpeed(speed) {
const value = Number.isFinite(Number(speed)) ? Number(speed) : 1;
return Math.round((Math.max(0.5, Math.min(2.0, value)) * 50) + 50);
return Math.round(Math.max(0.5, Math.min(2.0, value)) * 100);
}
speedFromSliderValue(value) {
const sliderValue = Number.isFinite(Number(value)) ? Number(value) : 50;
return Math.max(0.5, Math.min(2.0, (sliderValue - 50) / 50));
const sliderValue = Number.isFinite(Number(value)) ? Number(value) : 100;
return Math.max(0.5, Math.min(2.0, sliderValue / 100));
}
bindTopControls() {
@@ -453,14 +453,13 @@ class UIControllerModule extends BaseModule {
if (speedSlider && speedSlider.dataset.uiControllerBound !== 'true') {
speedSlider.dataset.uiControllerBound = 'true';
speedSlider.min = speedSlider.min || '50';
speedSlider.max = speedSlider.max || '150';
speedSlider.min = '50';
speedSlider.max = '200';
speedSlider.addEventListener('input', (event) => {
const persistenceManager = this.getModule('persistence-manager');
const speed = this.speedFromSliderValue(event.target.value);
document.dispatchEvent(new CustomEvent('animation:speed:change', {
detail: { speed: 1 }
detail: { speed }
}));
document.dispatchEvent(new CustomEvent('tts:speed:change', {
+1 -1
View File
@@ -386,7 +386,7 @@ class UIDisplayHandlerModule extends BaseModule {
controls.innerHTML = `
<a id="speech"></a>
<a id="autoplay"></a>
<span><a id="speed_reset"><span id="speed_label"></span></a><input type="range" min="50" max="150" value="100" id="speed" name="speed" /></span>
<span><a id="speed_reset"><span id="speed_label"></span></a><input type="range" min="50" max="200" value="100" id="speed" name="speed" /></span>
<a id="rewind"></a>
<a id="save"></a>
<a id="reload" disabled="disabled"></a>
+4
View File
@@ -47,8 +47,12 @@
"options.enableMusicDucking": "Musikabsenkung einschalten",
"options.elevenLabsSettings": "ElevenLabs API-Einstellungen",
"options.openAiSettings": "OpenAI API-Einstellungen",
"options.localOpenAiSettings": "Lokale OpenAI API-Einstellungen",
"options.optionalApiKey": "API-Schluessel (optional)",
"options.apiKey": "API-Schlüssel",
"options.apiUrl": "API-URL",
"options.model": "Modell",
"options.requestTimeoutMs": "Anfrage-Timeout (ms)",
"credits.button": "Credits",
"credits.buttonTitle": "Mitwirkende und Lizenzen anzeigen",
"credits.title": "Mitwirkende und Lizenzen",
+4
View File
@@ -47,8 +47,12 @@
"options.enableMusicDucking": "Enable music ducking",
"options.elevenLabsSettings": "ElevenLabs API Settings",
"options.openAiSettings": "OpenAI API Settings",
"options.localOpenAiSettings": "Local OpenAI API Settings",
"options.apiKey": "API Key",
"options.optionalApiKey": "API Key (optional)",
"options.apiUrl": "API URL",
"options.model": "Model",
"options.requestTimeoutMs": "Request timeout (ms)",
"credits.button": "credits",
"credits.buttonTitle": "Show credits and third-party licenses",
"credits.title": "Credits and Licenses",
+38 -2
View File
@@ -23,13 +23,14 @@
<div class="option-item">
<label>Voice:</label>
<select id="tts-voice" data-pref-bind="tts.voice"></select>
<input type="text" id="local-openai-voice" data-pref-bind="tts.local-openai-tts_voice" placeholder="alloy" style="display: none;">
</div>
<div class="option-item">
<label>Speech:</label>
<span class="slider-value">100%</span>
<input type="range" id="tts-speed" min="50" max="200" value="100"
data-pref-bind="app.speed" data-pref-transform="range:0.5,2.0">
data-pref-bind="tts.speed" data-pref-transform="multiplier-percent">
</div>
<!-- API Settings -->
@@ -50,7 +51,7 @@
</div>
<!-- OpenAI Settings -->
<div class="api-settings openai-settings" style="display: none;">
<div class="api-settings openai-tts-settings" style="display: none;">
<h3>OpenAI API Settings</h3>
<div class="option-item">
@@ -62,6 +63,41 @@
<label>API URL:</label>
<input type="text" id="openai-api-url" data-pref-bind="tts.openai-tts_api_url">
</div>
<div class="option-item">
<label>Model:</label>
<select id="openai-model" data-pref-bind="tts.openai-tts_model">
<option value="tts-1">TTS-1</option>
<option value="tts-1-hd">TTS-1 HD</option>
<option value="gpt-4o-mini-tts">GPT-4o mini TTS</option>
</select>
</div>
</div>
<!-- Local OpenAI-compatible Settings -->
<div class="api-settings local-openai-tts-settings" style="display: none;">
<h3>Local OpenAI API Settings</h3>
<div class="option-item">
<label>API Key (optional):</label>
<input type="password" id="local-openai-api-key" data-pref-bind="tts.local-openai-tts_api_key">
</div>
<div class="option-item">
<label>API URL:</label>
<input type="text" id="local-openai-api-url" data-pref-bind="tts.local-openai-tts_api_url">
</div>
<div class="option-item">
<label>Model:</label>
<input type="text" id="local-openai-model" data-pref-bind="tts.local-openai-tts_model" placeholder="tts-1">
</div>
<div class="option-item">
<label>Request timeout (ms):</label>
<input type="number" id="local-openai-timeout-ms" min="1000" max="600000" step="1000"
data-pref-bind="tts.local-openai-tts_timeout_ms" data-pref-transform="integer:1000,600000">
</div>
</div>
</div>
</div>