Update TTS providers and story markup
This commit is contained in:
@@ -1668,7 +1668,8 @@ body:not([data-game-running="true"]) #start_prompt {
|
||||
.modal-footer button,
|
||||
.option-item input[type="text"],
|
||||
.option-item input[type="password"],
|
||||
.option-item input[type="url"] {
|
||||
.option-item input[type="url"],
|
||||
.option-item input[type="number"] {
|
||||
background-color: transparent;
|
||||
border: 1px solid var(--panel-border);
|
||||
border-radius: var(--control-radius);
|
||||
@@ -1684,7 +1685,8 @@ body:not([data-game-running="true"]) #start_prompt {
|
||||
|
||||
.option-item input[type="text"],
|
||||
.option-item input[type="password"],
|
||||
.option-item input[type="url"] {
|
||||
.option-item input[type="url"],
|
||||
.option-item input[type="number"] {
|
||||
box-sizing: border-box;
|
||||
width: min(18rem, 60%);
|
||||
padding: 0.3rem 0.5rem;
|
||||
@@ -1692,7 +1694,8 @@ body:not([data-game-running="true"]) #start_prompt {
|
||||
|
||||
.option-item input[type="text"]:focus,
|
||||
.option-item input[type="password"]:focus,
|
||||
.option-item input[type="url"]:focus {
|
||||
.option-item input[type="url"]:focus,
|
||||
.option-item input[type="number"]:focus {
|
||||
outline: none;
|
||||
box-shadow: 0 0 0 2px rgba(90, 57, 33, 0.14);
|
||||
}
|
||||
|
||||
@@ -17,7 +17,7 @@ class AnimationQueueModule extends BaseModule {
|
||||
|
||||
// Animation timing properties - use parent's config system
|
||||
this.updateConfig({
|
||||
speed: 1.0, // Speed multiplier for delays (1.0 = no scaling, delays are pre-calculated)
|
||||
speed: 1.0,
|
||||
fastForwardEnabled: false
|
||||
});
|
||||
|
||||
@@ -44,7 +44,9 @@ class AnimationQueueModule extends BaseModule {
|
||||
// Listen for speed changes from UI
|
||||
document.addEventListener('animation:speed:change', (event) => {
|
||||
if (event.detail && typeof event.detail.speed === 'number') {
|
||||
// Speed from UI is a rate multiplier (0.5-2.0 typically)
|
||||
// Word timings are already speed-scaled before they reach
|
||||
// the scheduler. Keep the value only for diagnostics/API
|
||||
// compatibility; do not apply it again in schedule().
|
||||
this.config.speed = event.detail.speed;
|
||||
console.log(`AnimationQueue: Speed updated to ${this.config.speed}`);
|
||||
}
|
||||
@@ -71,8 +73,9 @@ class AnimationQueueModule extends BaseModule {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Adjust delay based on fast-forward or speed settings
|
||||
const actualDelay = this.config.fastForwardEnabled ? 0 : Math.max(0, delay * this.config.speed);
|
||||
// Delays are absolute timings calculated from the prepared sentence
|
||||
// duration. TTS/app speed has already been applied at that stage.
|
||||
const actualDelay = this.config.fastForwardEnabled ? 0 : Math.max(0, delay);
|
||||
|
||||
// Record the delay for tracking
|
||||
this.delay = Math.max(this.delay, delay);
|
||||
@@ -318,7 +321,7 @@ class AnimationQueueModule extends BaseModule {
|
||||
|
||||
/**
|
||||
* Set the animation speed
|
||||
* @param {number} speed - Animation speed factor (lower is faster)
|
||||
* @param {number} speed - Stored speed value for compatibility/diagnostics
|
||||
*/
|
||||
setSpeed(speed) {
|
||||
if (typeof speed !== 'number' || speed <= 0) {
|
||||
|
||||
@@ -27,7 +27,7 @@ export class BrowserTTSModule extends TTSHandlerModule {
|
||||
this.currentUtterance = null;
|
||||
|
||||
// Bind additional methods
|
||||
this.bindMethods(['handleVoicePreferenceChanged']);
|
||||
this.bindMethods(['handleVoicePreferenceChanged', 'estimateSpeechDuration']);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -368,26 +368,29 @@ export class BrowserTTSModule extends TTSHandlerModule {
|
||||
|
||||
// Set up event handlers
|
||||
utterance.onstart = this.utteranceHandlers.start;
|
||||
utterance.onend = () => {
|
||||
this.utteranceHandlers.end();
|
||||
if (callback) {
|
||||
callback({ success: true });
|
||||
}
|
||||
};
|
||||
utterance.onerror = (event) => {
|
||||
this.utteranceHandlers.error(event);
|
||||
if (callback) {
|
||||
callback({ success: false, reason: 'synthesis_error', error: event });
|
||||
}
|
||||
};
|
||||
utterance.onpause = this.utteranceHandlers.pause;
|
||||
utterance.onresume = this.utteranceHandlers.resume;
|
||||
|
||||
// Start speaking
|
||||
this.currentUtterance = utterance;
|
||||
speechSynthesis.speak(utterance);
|
||||
|
||||
return true;
|
||||
|
||||
return new Promise(resolve => {
|
||||
utterance.onend = () => {
|
||||
this.utteranceHandlers.end();
|
||||
if (callback) {
|
||||
callback({ success: true });
|
||||
}
|
||||
resolve(true);
|
||||
};
|
||||
utterance.onerror = (event) => {
|
||||
this.utteranceHandlers.error(event);
|
||||
if (callback) {
|
||||
callback({ success: false, reason: 'synthesis_error', error: event });
|
||||
}
|
||||
resolve(false);
|
||||
};
|
||||
speechSynthesis.speak(utterance);
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('Browser TTS: Failed to speak:', error);
|
||||
if (callback) {
|
||||
@@ -469,7 +472,7 @@ export class BrowserTTSModule extends TTSHandlerModule {
|
||||
|
||||
if (typeof options.speed === 'number') {
|
||||
// Web Speech rate uses 1.0 as normal, matching the app-wide slider.
|
||||
this.voiceOptions.speed = Math.max(0.1, Math.min(10.0, options.speed));
|
||||
this.voiceOptions.speed = Math.max(0.5, Math.min(2.0, options.speed));
|
||||
}
|
||||
|
||||
if (typeof options.pitch === 'number') {
|
||||
@@ -494,8 +497,23 @@ export class BrowserTTSModule extends TTSHandlerModule {
|
||||
* @returns {Promise<Object>} - Promise that resolves to null
|
||||
*/
|
||||
async preloadSpeech(text) {
|
||||
// Browser TTS can't preload speech
|
||||
return { success: false, reason: 'not_supported' };
|
||||
if (!this.isReady || !text) {
|
||||
return { success: false, reason: 'not_ready_or_empty_text' };
|
||||
}
|
||||
|
||||
return {
|
||||
success: true,
|
||||
text,
|
||||
duration: this.estimateSpeechDuration(text),
|
||||
directPlayback: true
|
||||
};
|
||||
}
|
||||
|
||||
estimateSpeechDuration(text) {
|
||||
const processedText = this.preprocessText(text);
|
||||
const charactersPerSecond = 12;
|
||||
const speed = Math.max(0.5, Math.min(2.0, Number(this.voiceOptions.speed) || 1.0));
|
||||
return Math.max((processedText.length / (charactersPerSecond * speed)) * 1000, 800);
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -35,6 +35,9 @@ class ChoiceDisplayModule extends BaseModule {
|
||||
'render',
|
||||
'clear',
|
||||
'normalizeChoices',
|
||||
'orderChoicesForPresentation',
|
||||
'shuffleChoices',
|
||||
'randomInt',
|
||||
'assignLetters',
|
||||
'selectChoice',
|
||||
'getTagValue',
|
||||
@@ -137,7 +140,7 @@ class ChoiceDisplayModule extends BaseModule {
|
||||
}
|
||||
|
||||
normalizeChoices(choices) {
|
||||
return this.assignLetters(choices.slice(0, 36).map((choice, order) => {
|
||||
const normalized = choices.slice(0, 36).map((choice, order) => {
|
||||
const tags = Array.isArray(choice.tags) ? choice.tags : [];
|
||||
const category = choice.category || this.getTagValue(tags, 'action');
|
||||
return {
|
||||
@@ -145,11 +148,64 @@ class ChoiceDisplayModule extends BaseModule {
|
||||
text: String(choice.text || ''),
|
||||
tags,
|
||||
category,
|
||||
sourceOrder: order,
|
||||
optional: this.hasTag(tags, 'optional'),
|
||||
letter: '',
|
||||
templateCell: this.getTemplateCell({ ...choice, tags, category })
|
||||
};
|
||||
}));
|
||||
});
|
||||
|
||||
return this.assignLetters(this.orderChoicesForPresentation(normalized));
|
||||
}
|
||||
|
||||
orderChoicesForPresentation(choices) {
|
||||
const groupOrder = [];
|
||||
const grouped = new Map();
|
||||
const ungrouped = [];
|
||||
|
||||
choices.forEach((choice) => {
|
||||
const group = String(choice.category || '').trim();
|
||||
if (!group) {
|
||||
ungrouped.push(choice);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!grouped.has(group)) {
|
||||
grouped.set(group, []);
|
||||
groupOrder.push(group);
|
||||
}
|
||||
grouped.get(group).push(choice);
|
||||
});
|
||||
|
||||
const ordered = [];
|
||||
groupOrder.forEach((group) => {
|
||||
ordered.push(...this.shuffleChoices(grouped.get(group) || []));
|
||||
});
|
||||
|
||||
if (ungrouped.length > 0) {
|
||||
ordered.push(...this.shuffleChoices(ungrouped));
|
||||
}
|
||||
|
||||
return ordered;
|
||||
}
|
||||
|
||||
shuffleChoices(choices) {
|
||||
const shuffled = choices.slice();
|
||||
for (let index = shuffled.length - 1; index > 0; index -= 1) {
|
||||
const swapIndex = this.randomInt(index + 1);
|
||||
[shuffled[index], shuffled[swapIndex]] = [shuffled[swapIndex], shuffled[index]];
|
||||
}
|
||||
return shuffled;
|
||||
}
|
||||
|
||||
randomInt(exclusiveMax) {
|
||||
const max = Math.max(1, Number(exclusiveMax) || 1);
|
||||
if (window.crypto && typeof window.crypto.getRandomValues === 'function') {
|
||||
const values = new Uint32Array(1);
|
||||
window.crypto.getRandomValues(values);
|
||||
return values[0] % max;
|
||||
}
|
||||
return Math.floor(Math.random() * max);
|
||||
}
|
||||
|
||||
assignLetters(choices) {
|
||||
|
||||
@@ -75,7 +75,7 @@ export class ElevenLabsTTSModule extends ApiTTSModuleBase {
|
||||
|
||||
const preferredSpeed = persistenceManager.getPreference('tts', 'speed', this.voiceOptions.speed);
|
||||
if (typeof preferredSpeed === 'number') {
|
||||
this.voiceOptions.speed = this.getApiSpeed(preferredSpeed);
|
||||
this.voiceOptions.speed = this.normalizeAppSpeed(preferredSpeed);
|
||||
}
|
||||
|
||||
this.isReady = true;
|
||||
@@ -255,7 +255,7 @@ export class ElevenLabsTTSModule extends ApiTTSModuleBase {
|
||||
}
|
||||
|
||||
if (typeof options.speed === 'number') {
|
||||
this.voiceOptions.speed = this.getApiSpeed(options.speed);
|
||||
this.voiceOptions.speed = this.normalizeAppSpeed(options.speed);
|
||||
}
|
||||
|
||||
// Handle ElevenLabs-specific options
|
||||
@@ -271,7 +271,17 @@ export class ElevenLabsTTSModule extends ApiTTSModuleBase {
|
||||
}
|
||||
|
||||
getApiSpeed(speed) {
|
||||
return Math.max(0.7, Math.min(1.2, Number.isFinite(speed) ? speed : 1.0));
|
||||
const appSpeed = this.normalizeAppSpeed(speed);
|
||||
if (appSpeed <= 1.0) {
|
||||
return 0.7 + ((appSpeed - 0.5) / 0.5) * 0.3;
|
||||
}
|
||||
|
||||
return 1.0 + (appSpeed - 1.0) * 0.2;
|
||||
}
|
||||
|
||||
normalizeAppSpeed(speed) {
|
||||
const value = Number.isFinite(Number(speed)) ? Number(speed) : 1.0;
|
||||
return Math.max(0.5, Math.min(2.0, value));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -210,9 +210,21 @@ class GameLoopModule extends BaseModule {
|
||||
return false;
|
||||
}
|
||||
|
||||
await this.resetClientPlaybackAndDisplay();
|
||||
this.currentChoices = [];
|
||||
this.currentInputMode = 'none';
|
||||
document.dispatchEvent(new CustomEvent('story:choices', { detail: [] }));
|
||||
document.dispatchEvent(new CustomEvent('story:input-mode', { detail: 'none' }));
|
||||
document.dispatchEvent(new CustomEvent('story:history-restoring', {
|
||||
detail: { active: true, reason: 'autosave-reconnect-prepare' }
|
||||
}));
|
||||
|
||||
const response = await socketClient.resumeGame(browserSave.inkState);
|
||||
if (!response?.success) {
|
||||
console.warn('GameLoop: autosave resume failed', response);
|
||||
document.dispatchEvent(new CustomEvent('story:history-restoring', {
|
||||
detail: { active: false, reason: 'autosave-reconnect-failed' }
|
||||
}));
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -222,7 +234,7 @@ class GameLoopModule extends BaseModule {
|
||||
this.gameState.canSave = this.gameState.started;
|
||||
this.gameState.canLoad = true;
|
||||
this.updateUIState();
|
||||
await this.restoreBrowserSave(browserSave, 'autosave-resume', { resetDisplay: true });
|
||||
await this.restoreBrowserSave(browserSave, 'autosave-resume', { resetDisplay: false });
|
||||
this.restoreInputStateFromSave(browserSave, 'autosave-resume');
|
||||
return true;
|
||||
}
|
||||
@@ -281,6 +293,14 @@ class GameLoopModule extends BaseModule {
|
||||
const storyHistory = this.getModule('story-history');
|
||||
if (storyHistory && typeof storyHistory.startNewGame === 'function') {
|
||||
await storyHistory.startNewGame();
|
||||
if (typeof storyHistory.saveSlot === 'function') {
|
||||
await storyHistory.saveSlot(this.autoSaveSlot, {
|
||||
inkState: null,
|
||||
choices: [],
|
||||
inputMode: 'none',
|
||||
running: false
|
||||
});
|
||||
}
|
||||
}
|
||||
const response = await socketClient.newGame();
|
||||
if (!response?.success) {
|
||||
@@ -296,6 +316,15 @@ class GameLoopModule extends BaseModule {
|
||||
this.gameState.canSave = true;
|
||||
this.gameState.canLoad = Boolean(response.canLoad);
|
||||
this.updateUIState();
|
||||
if (response.savedState && storyHistory && typeof storyHistory.saveSlot === 'function') {
|
||||
await storyHistory.saveSlot(this.autoSaveSlot, {
|
||||
inkState: response.savedState,
|
||||
choices: [],
|
||||
inputMode: 'none',
|
||||
running: true
|
||||
});
|
||||
this.lastInkState = response.savedState;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -20,6 +20,7 @@ export class KokoroTTSModule extends TTSHandlerModule {
|
||||
this.lastProgressTime = null;
|
||||
this.lastProgressValue = null;
|
||||
this.modelLoaded = false;
|
||||
this.unsupportedReason = '';
|
||||
|
||||
// Options for playback
|
||||
this.options = {
|
||||
@@ -37,7 +38,8 @@ export class KokoroTTSModule extends TTSHandlerModule {
|
||||
'pause',
|
||||
'resume',
|
||||
'getDefaultVoices',
|
||||
'setVoiceOptions'
|
||||
'setVoiceOptions',
|
||||
'supportsGameLanguage'
|
||||
]);
|
||||
}
|
||||
|
||||
@@ -59,6 +61,18 @@ export class KokoroTTSModule extends TTSHandlerModule {
|
||||
return false;
|
||||
}
|
||||
|
||||
const gameConfig = this.getModule('game-config');
|
||||
const gameLanguage = gameConfig?.getLocale?.() || 'en_US';
|
||||
if (!this.supportsGameLanguage(gameLanguage)) {
|
||||
this.voices = [];
|
||||
this.isReady = false;
|
||||
this.unsupportedReason = `Kokoro TTS supports English and Chinese only; game language is ${gameLanguage}`;
|
||||
this.reportProgress(100, 'Kokoro TTS disabled for this language');
|
||||
console.log(`Kokoro TTS: ${this.unsupportedReason}`);
|
||||
return true;
|
||||
}
|
||||
this.unsupportedReason = '';
|
||||
|
||||
this.addEventListener(document, 'preference-updated', (event) => {
|
||||
const { category, key } = event.detail || {};
|
||||
if (category === 'audio' && ['masterVolume', 'ttsVolume', 'masterVolumeEnabled', 'ttsVolumeEnabled'].includes(key) && this.currentAudio) {
|
||||
@@ -388,12 +402,27 @@ export class KokoroTTSModule extends TTSHandlerModule {
|
||||
|
||||
return Math.max(0, Math.min(1, this.options.volume * (masterEnabled ? masterVolume : 0) * (ttsEnabled ? ttsVolume : 0)));
|
||||
}
|
||||
|
||||
supportsGameLanguage(language) {
|
||||
const normalized = String(language || '').trim().replace('_', '-').toLowerCase();
|
||||
const languageCode = normalized.split('-')[0];
|
||||
return languageCode === 'en'
|
||||
|| languageCode === 'english'
|
||||
|| languageCode === 'zh'
|
||||
|| languageCode === 'chinese'
|
||||
|| languageCode === 'cmn'
|
||||
|| languageCode === 'yue';
|
||||
}
|
||||
|
||||
/**
|
||||
* Get available voices
|
||||
* @returns {Array} - Array of voice objects
|
||||
*/
|
||||
async getVoices() {
|
||||
if (this.unsupportedReason) {
|
||||
return [];
|
||||
}
|
||||
|
||||
// If no voices are loaded yet, return default voices
|
||||
if (!this.voices || this.voices.length === 0) {
|
||||
return this.getDefaultVoices();
|
||||
|
||||
@@ -27,6 +27,12 @@ class LayoutRendererModule extends BaseModule {
|
||||
'decorateInlineWord',
|
||||
'applyGlossaryEntries',
|
||||
'normalizeGlossaryText',
|
||||
'normalizeGlossaryToken',
|
||||
'normalizeGlossaryCompact',
|
||||
'buildGlossaryTermPatterns',
|
||||
'buildCompactGlossaryTermPatterns',
|
||||
'decorateGlossarySegment',
|
||||
'decorateGlossaryRange',
|
||||
'decorateGlossaryWord',
|
||||
'ensureGlossaryTooltip',
|
||||
'showGlossaryTooltip',
|
||||
@@ -337,34 +343,56 @@ class LayoutRendererModule extends BaseModule {
|
||||
|
||||
let cursor = 0;
|
||||
const segments = [];
|
||||
let compactCursor = 0;
|
||||
const compactSegments = [];
|
||||
const fullText = words.map((word, index) => {
|
||||
if (index > 0) cursor += 1;
|
||||
const start = cursor;
|
||||
cursor += word.text.length;
|
||||
segments.push({ ...word, start, end: cursor });
|
||||
|
||||
const compactText = this.normalizeGlossaryCompact(word.text);
|
||||
if (compactText) {
|
||||
const compactStart = compactCursor;
|
||||
compactCursor += compactText.length;
|
||||
compactSegments.push({ ...word, start: compactStart, end: compactCursor });
|
||||
}
|
||||
|
||||
return word.text;
|
||||
}).join(' ');
|
||||
const compactFullText = words.map(word => this.normalizeGlossaryCompact(word.text)).join('');
|
||||
|
||||
entries
|
||||
.filter(entry => entry && entry.term && entry.definition)
|
||||
.forEach(entry => {
|
||||
const normalizedTerm = this.normalizeGlossaryText(entry.term);
|
||||
if (!normalizedTerm) return;
|
||||
|
||||
const matcher = new RegExp(`(^|\\s)(${this.escapeRegExp(normalizedTerm)})(?=\\s|$|[.,;:!?])`, 'giu');
|
||||
let match;
|
||||
while ((match = matcher.exec(fullText)) !== null) {
|
||||
const matchStart = match.index + match[1].length;
|
||||
const matchEnd = matchStart + match[2].length;
|
||||
segments
|
||||
.filter(segment => segment.end > matchStart && segment.start < matchEnd)
|
||||
.forEach(segment => this.decorateGlossaryWord(segment.element, entry));
|
||||
}
|
||||
this.buildGlossaryTermPatterns(entry.term).forEach((pattern) => {
|
||||
const matcher = new RegExp(`(^|\\s)(${pattern})(?=\\s|$|[.,;:!?])`, 'giu');
|
||||
let match;
|
||||
while ((match = matcher.exec(fullText)) !== null) {
|
||||
const matchStart = match.index + match[1].length;
|
||||
const matchEnd = matchStart + match[2].length;
|
||||
segments
|
||||
.filter(segment => segment.end > matchStart && segment.start < matchEnd)
|
||||
.forEach(segment => this.decorateGlossarySegment(segment, entry, matchStart, matchEnd, 'text'));
|
||||
}
|
||||
});
|
||||
this.buildCompactGlossaryTermPatterns(entry.term).forEach((pattern) => {
|
||||
const matcher = new RegExp(pattern, 'giu');
|
||||
let match;
|
||||
while ((match = matcher.exec(compactFullText)) !== null) {
|
||||
const matchStart = match.index;
|
||||
const matchEnd = matchStart + match[0].length;
|
||||
compactSegments
|
||||
.filter(segment => segment.end > matchStart && segment.start < matchEnd)
|
||||
.forEach(segment => this.decorateGlossarySegment(segment, entry, matchStart, matchEnd, 'compact'));
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
normalizeGlossaryText(text) {
|
||||
return String(text || '')
|
||||
.normalize('NFC')
|
||||
.replace(/\u200c/g, '')
|
||||
.replace(/\u00ad/g, '')
|
||||
.replace(/-\s*$/g, '')
|
||||
@@ -372,6 +400,157 @@ class LayoutRendererModule extends BaseModule {
|
||||
.trim();
|
||||
}
|
||||
|
||||
normalizeGlossaryToken(text) {
|
||||
return this.normalizeGlossaryText(text)
|
||||
.replace(/^[.,;:!?()[\]{}"'„“”‚‘’»«]+|[.,;:!?()[\]{}"'„“”‚‘’»«]+$/g, '');
|
||||
}
|
||||
|
||||
normalizeGlossaryCompact(text) {
|
||||
return this.normalizeGlossaryToken(text)
|
||||
.replace(/[-\s]+/g, '')
|
||||
.replace(/[.,;:!?()[\]{}"'„“”‚‘’»«]+/g, '');
|
||||
}
|
||||
|
||||
buildGlossaryTermPatterns(term) {
|
||||
const normalizedTerm = this.normalizeGlossaryText(term);
|
||||
if (!normalizedTerm) return [];
|
||||
|
||||
const exact = normalizedTerm
|
||||
.split(/\s+/)
|
||||
.map(token => this.escapeRegExp(this.normalizeGlossaryToken(token)))
|
||||
.filter(Boolean)
|
||||
.join('\\s+');
|
||||
if (!exact) return [];
|
||||
|
||||
const inflected = normalizedTerm
|
||||
.split(/\s+/)
|
||||
.map((token, index, tokens) => {
|
||||
const normalized = this.normalizeGlossaryToken(token);
|
||||
if (!normalized) return '';
|
||||
const escaped = this.escapeRegExp(normalized);
|
||||
const isLast = index === tokens.length - 1;
|
||||
return isLast ? `${escaped}(?:s|es|e|en|er|n)?` : `${escaped}(?:e|en|er|es|n)?`;
|
||||
})
|
||||
.filter(Boolean)
|
||||
.join('\\s+');
|
||||
|
||||
return [...new Set([exact, inflected])];
|
||||
}
|
||||
|
||||
buildCompactGlossaryTermPatterns(term) {
|
||||
const tokens = this.normalizeGlossaryText(term)
|
||||
.split(/\s+/)
|
||||
.map(token => this.normalizeGlossaryCompact(token))
|
||||
.filter(Boolean);
|
||||
if (tokens.length === 0) return [];
|
||||
|
||||
const exact = tokens.map(token => this.escapeRegExp(token)).join('');
|
||||
const inflected = tokens
|
||||
.map((token, index) => {
|
||||
const escaped = this.escapeRegExp(token);
|
||||
const isLast = index === tokens.length - 1;
|
||||
return isLast ? `${escaped}(?:s|es|e|en|er|n)?` : `${escaped}(?:e|en|er|es|n)?`;
|
||||
})
|
||||
.join('');
|
||||
|
||||
return [...new Set([exact, inflected])];
|
||||
}
|
||||
|
||||
decorateGlossarySegment(segment, entry, matchStart, matchEnd, mode = 'text') {
|
||||
if (!segment?.element || !entry?.definition) return;
|
||||
|
||||
const localStart = Math.max(0, matchStart - segment.start);
|
||||
const localEnd = Math.min(segment.end - segment.start, matchEnd - segment.start);
|
||||
if (localEnd <= localStart) return;
|
||||
|
||||
const segmentLength = mode === 'compact'
|
||||
? this.normalizeGlossaryCompact(segment.text).length
|
||||
: segment.text.length;
|
||||
|
||||
if (localStart <= 0 && localEnd >= segmentLength) {
|
||||
this.decorateGlossaryWord(segment.element, entry);
|
||||
return;
|
||||
}
|
||||
|
||||
if (mode === 'compact') {
|
||||
return;
|
||||
}
|
||||
|
||||
this.decorateGlossaryRange(segment.element, entry, localStart, localEnd);
|
||||
}
|
||||
|
||||
decorateGlossaryRange(word, entry, start, end) {
|
||||
if (!word || !entry?.definition) return;
|
||||
|
||||
const text = word.textContent || '';
|
||||
const safeStart = Math.max(0, Math.min(text.length, start));
|
||||
const safeEnd = Math.max(safeStart, Math.min(text.length, end));
|
||||
if (safeStart === 0 && safeEnd >= text.length) {
|
||||
this.decorateGlossaryWord(word, entry);
|
||||
return;
|
||||
}
|
||||
if (safeEnd <= safeStart) return;
|
||||
|
||||
word.dataset.glossaryPartial = 'true';
|
||||
|
||||
const textNodes = [];
|
||||
const filter = window.NodeFilter || NodeFilter;
|
||||
const walker = document.createTreeWalker(word, filter.SHOW_TEXT);
|
||||
let node;
|
||||
while ((node = walker.nextNode())) {
|
||||
textNodes.push(node);
|
||||
}
|
||||
|
||||
let offset = 0;
|
||||
textNodes.forEach((textNode) => {
|
||||
const nodeText = textNode.nodeValue || '';
|
||||
const nodeStart = offset;
|
||||
const nodeEnd = nodeStart + nodeText.length;
|
||||
offset = nodeEnd;
|
||||
|
||||
const overlapStart = Math.max(safeStart, nodeStart);
|
||||
const overlapEnd = Math.min(safeEnd, nodeEnd);
|
||||
if (overlapEnd <= overlapStart || !textNode.parentNode) return;
|
||||
|
||||
const localStart = overlapStart - nodeStart;
|
||||
const localEnd = overlapEnd - nodeStart;
|
||||
const before = nodeText.slice(0, localStart);
|
||||
const matched = nodeText.slice(localStart, localEnd);
|
||||
const after = nodeText.slice(localEnd);
|
||||
const parent = textNode.parentNode;
|
||||
|
||||
if (before) {
|
||||
parent.insertBefore(document.createTextNode(before), textNode);
|
||||
}
|
||||
|
||||
if (matched) {
|
||||
const gloss = document.createElement('span');
|
||||
gloss.textContent = matched;
|
||||
this.decorateGlossaryWord(gloss, entry);
|
||||
parent.insertBefore(gloss, textNode);
|
||||
}
|
||||
|
||||
if (after) {
|
||||
parent.insertBefore(document.createTextNode(after), textNode);
|
||||
}
|
||||
|
||||
parent.removeChild(textNode);
|
||||
});
|
||||
|
||||
if (textNodes.length === 0) {
|
||||
const before = text.slice(0, safeStart);
|
||||
const matched = text.slice(safeStart, safeEnd);
|
||||
const after = text.slice(safeEnd);
|
||||
word.textContent = '';
|
||||
if (before) word.appendChild(document.createTextNode(before));
|
||||
const gloss = document.createElement('span');
|
||||
gloss.textContent = matched;
|
||||
this.decorateGlossaryWord(gloss, entry);
|
||||
word.appendChild(gloss);
|
||||
if (after) word.appendChild(document.createTextNode(after));
|
||||
}
|
||||
}
|
||||
|
||||
decorateGlossaryWord(word, entry) {
|
||||
if (!word || !entry?.definition) return;
|
||||
word.classList.add('story-glossary-word');
|
||||
|
||||
@@ -122,6 +122,7 @@ const ModuleLoader = (function() {
|
||||
{ id: 'browser-tts', script: '/js/browser-tts-module.js', weight: 12 },
|
||||
{ id: 'elevenlabs-tts', script: '/js/elevenlabs-tts-module.js', weight: 12 },
|
||||
{ id: 'openai-tts', script: '/js/openai-tts-module.js', weight: 12 },
|
||||
{ id: 'local-openai-tts', script: '/js/local-openai-tts-module.js', weight: 12 },
|
||||
{ id: 'tts-factory', script: '/js/tts-factory-module.js', weight: 13 }, // TTSFactory must be loaded before TTSPlayer
|
||||
|
||||
// UI and interaction modules
|
||||
|
||||
@@ -0,0 +1,259 @@
|
||||
/**
|
||||
* LocalOpenAITTSModule
|
||||
* Provides TTS via local or self-hosted OpenAI-compatible /audio/speech APIs.
|
||||
*/
|
||||
import { ApiTTSModuleBase } from './api-tts-module-base.js';
|
||||
|
||||
export class LocalOpenAITTSModule extends ApiTTSModuleBase {
|
||||
constructor() {
|
||||
super('local-openai-tts', 'Local OpenAI TTS');
|
||||
|
||||
this.voiceOptions = {
|
||||
voice: 'alloy',
|
||||
model: 'tts-1',
|
||||
speed: 1.0,
|
||||
response_format: 'mp3'
|
||||
};
|
||||
this.voices = [];
|
||||
}
|
||||
|
||||
getDefaultApiBaseUrl() {
|
||||
return 'http://localhost:8000/v1';
|
||||
}
|
||||
|
||||
async initialize() {
|
||||
try {
|
||||
this.reportProgress(10, 'Initializing Local OpenAI TTS');
|
||||
|
||||
const parentInit = await super.initialize();
|
||||
if (!parentInit) {
|
||||
console.error('Local OpenAI TTS: Parent initialization failed');
|
||||
return false;
|
||||
}
|
||||
|
||||
const persistenceManager = this.getModule('persistence-manager');
|
||||
if (!persistenceManager) {
|
||||
console.error('Local OpenAI TTS: Required dependency persistence-manager not found');
|
||||
return false;
|
||||
}
|
||||
|
||||
const preferredVoice = persistenceManager.getPreference('tts', `${this.id}_voice`, this.voiceOptions.voice);
|
||||
if (preferredVoice) {
|
||||
this.voiceOptions.voice = this.normalizeTextOption(preferredVoice, this.voiceOptions.voice);
|
||||
}
|
||||
|
||||
const preferredModel = persistenceManager.getPreference('tts', `${this.id}_model`, this.voiceOptions.model);
|
||||
if (preferredModel) {
|
||||
this.voiceOptions.model = this.normalizeTextOption(preferredModel, this.voiceOptions.model);
|
||||
}
|
||||
|
||||
const preferredFormat = persistenceManager.getPreference('tts', `${this.id}_format`, this.voiceOptions.response_format);
|
||||
if (preferredFormat) {
|
||||
this.voiceOptions.response_format = this.normalizeResponseFormat(preferredFormat);
|
||||
}
|
||||
|
||||
const preferredSpeed = persistenceManager.getPreference('tts', 'speed', this.voiceOptions.speed);
|
||||
if (typeof preferredSpeed === 'number') {
|
||||
this.voiceOptions.speed = this.normalizeAppSpeed(preferredSpeed);
|
||||
}
|
||||
|
||||
this.isReady = Boolean(this.apiBaseUrl && this.voiceOptions.voice && this.voiceOptions.model);
|
||||
this.reportProgress(100, this.isReady ? 'Local OpenAI TTS initialized' : 'Local OpenAI TTS not configured');
|
||||
return true;
|
||||
} catch (error) {
|
||||
console.error('Local OpenAI TTS: Initialization error:', error);
|
||||
this.isReady = false;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
async loadVoices() {
|
||||
this.voices = [];
|
||||
return true;
|
||||
}
|
||||
|
||||
selectVoiceForLocale() {
|
||||
return this.selectDefaultVoice();
|
||||
}
|
||||
|
||||
selectDefaultVoice() {
|
||||
this.voiceOptions.voice = this.normalizeTextOption(this.voiceOptions.voice, 'alloy');
|
||||
return true;
|
||||
}
|
||||
|
||||
getAvailableVoices() {
|
||||
return [];
|
||||
}
|
||||
|
||||
async getVoices() {
|
||||
return [];
|
||||
}
|
||||
|
||||
async generateSpeechAudio(text, options = {}) {
|
||||
if (!this.isReady || !this.apiBaseUrl) {
|
||||
return { success: false, reason: 'not_ready' };
|
||||
}
|
||||
|
||||
try {
|
||||
const processedText = this.preprocessText(text);
|
||||
if (!processedText) {
|
||||
return { success: false, reason: 'empty_text' };
|
||||
}
|
||||
|
||||
const payload = {
|
||||
model: this.normalizeTextOption(this.voiceOptions.model, 'tts-1'),
|
||||
input: processedText,
|
||||
voice: this.normalizeTextOption(this.voiceOptions.voice, 'alloy'),
|
||||
response_format: this.normalizeResponseFormat(this.voiceOptions.response_format),
|
||||
speed: this.getApiSpeed(this.voiceOptions.speed)
|
||||
};
|
||||
|
||||
const headers = {
|
||||
'Content-Type': 'application/json'
|
||||
};
|
||||
if (this.apiKey) {
|
||||
headers.Authorization = `Bearer ${this.apiKey}`;
|
||||
}
|
||||
|
||||
const response = await fetch(`${this.apiBaseUrl.replace(/\/+$/, '')}/audio/speech`, {
|
||||
method: 'POST',
|
||||
headers,
|
||||
body: JSON.stringify(payload),
|
||||
signal: options.signal
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
throw new Error(`API error: ${response.status} ${response.statusText} - ${errorText}`);
|
||||
}
|
||||
|
||||
const audioBlob = await response.blob();
|
||||
const arrayBuffer = await audioBlob.arrayBuffer();
|
||||
|
||||
return {
|
||||
success: true,
|
||||
audioData: arrayBuffer
|
||||
};
|
||||
} catch (error) {
|
||||
if (error?.name === 'AbortError') {
|
||||
console.error('Local OpenAI TTS: Speech request was aborted:', error);
|
||||
return {
|
||||
success: false,
|
||||
reason: 'aborted',
|
||||
error: error.message
|
||||
};
|
||||
}
|
||||
console.error('Local OpenAI TTS: Error generating speech:', error);
|
||||
return {
|
||||
success: false,
|
||||
reason: 'api_error',
|
||||
error: error.message
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
setVoiceOptions(options = {}) {
|
||||
const persistenceManager = this.getModule('persistence-manager');
|
||||
|
||||
if (typeof options.voice === 'string') {
|
||||
this.voiceOptions.voice = this.normalizeTextOption(options.voice, this.voiceOptions.voice);
|
||||
if (persistenceManager) {
|
||||
persistenceManager.updatePreference('tts', `${this.id}_voice`, this.voiceOptions.voice);
|
||||
}
|
||||
}
|
||||
|
||||
if (typeof options.speed === 'number') {
|
||||
this.voiceOptions.speed = this.normalizeAppSpeed(options.speed);
|
||||
}
|
||||
|
||||
if (typeof options.model === 'string') {
|
||||
this.voiceOptions.model = this.normalizeTextOption(options.model, this.voiceOptions.model);
|
||||
if (persistenceManager) {
|
||||
persistenceManager.updatePreference('tts', `${this.id}_model`, this.voiceOptions.model);
|
||||
}
|
||||
}
|
||||
|
||||
if (typeof options.response_format === 'string') {
|
||||
this.voiceOptions.response_format = this.normalizeResponseFormat(options.response_format);
|
||||
if (persistenceManager) {
|
||||
persistenceManager.updatePreference('tts', `${this.id}_format`, this.voiceOptions.response_format);
|
||||
}
|
||||
}
|
||||
|
||||
this.isReady = Boolean(this.apiBaseUrl && this.voiceOptions.voice && this.voiceOptions.model);
|
||||
this.notifyReadyState();
|
||||
}
|
||||
|
||||
handleApiKeyChanged(event) {
|
||||
if (!event?.detail || event.detail.provider !== this.id) return;
|
||||
const newKey = event.detail.key || '';
|
||||
if (newKey && /^https?:\/\//i.test(newKey)) {
|
||||
console.error('Local OpenAI TTS: Received URL instead of API key, ignoring it');
|
||||
return;
|
||||
}
|
||||
|
||||
const oldKey = this.apiKey;
|
||||
this.apiKey = newKey;
|
||||
|
||||
const persistenceManager = this.getModule('persistence-manager');
|
||||
if (persistenceManager && oldKey !== newKey) {
|
||||
persistenceManager.updatePreference('tts', `${this.id}_api_key`, newKey);
|
||||
}
|
||||
|
||||
const wasReady = this.isReady;
|
||||
this.isReady = Boolean(this.apiBaseUrl && this.voiceOptions.voice && this.voiceOptions.model);
|
||||
if (wasReady !== this.isReady) {
|
||||
this.notifyReadyState();
|
||||
}
|
||||
}
|
||||
|
||||
handleApiUrlChanged(event) {
|
||||
if (!event?.detail || event.detail.provider !== this.id) return;
|
||||
const oldUrl = this.apiBaseUrl;
|
||||
const newUrl = String(event.detail.url || this.getDefaultApiBaseUrl()).trim().replace(/\/+$/, '');
|
||||
this.apiBaseUrl = newUrl;
|
||||
|
||||
const persistenceManager = this.getModule('persistence-manager');
|
||||
if (persistenceManager && oldUrl !== newUrl) {
|
||||
persistenceManager.updatePreference('tts', `${this.id}_api_url`, newUrl);
|
||||
}
|
||||
|
||||
const wasReady = this.isReady;
|
||||
this.isReady = Boolean(this.apiBaseUrl && this.voiceOptions.voice && this.voiceOptions.model);
|
||||
if (wasReady !== this.isReady || oldUrl !== newUrl) {
|
||||
this.notifyReadyState();
|
||||
}
|
||||
}
|
||||
|
||||
normalizeTextOption(value, fallback) {
|
||||
const text = String(value || '').trim();
|
||||
return text || fallback;
|
||||
}
|
||||
|
||||
normalizeResponseFormat(value) {
|
||||
const format = String(value || '').trim().toLowerCase();
|
||||
const validFormats = ['mp3', 'opus', 'aac', 'flac', 'wav', 'pcm'];
|
||||
return validFormats.includes(format) ? format : 'mp3';
|
||||
}
|
||||
|
||||
getApiSpeed(speed) {
|
||||
const value = Number.isFinite(Number(speed)) ? Number(speed) : this.normalizeAppSpeed(speed);
|
||||
return Math.max(0.25, Math.min(4.0, value));
|
||||
}
|
||||
|
||||
normalizeAppSpeed(speed) {
|
||||
const value = Number.isFinite(Number(speed)) ? Number(speed) : 1.0;
|
||||
return Math.max(0.5, Math.min(2.0, value));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
const localOpenAITTSModule = new LocalOpenAITTSModule();
|
||||
|
||||
export { localOpenAITTSModule };
|
||||
|
||||
if (window.moduleRegistry) {
|
||||
window.moduleRegistry.register(localOpenAITTSModule);
|
||||
}
|
||||
|
||||
window.LocalOpenAITTSModule = localOpenAITTSModule;
|
||||
@@ -19,6 +19,8 @@ class MarkupParserModule extends BaseModule {
|
||||
'parseParagraph',
|
||||
'parseInline',
|
||||
'extractGlossaryTags',
|
||||
'extractTtsInstructionTags',
|
||||
'normalizeTtsInstructionProvider',
|
||||
'parseImageOptions',
|
||||
'parseSfxOptions',
|
||||
'parseMusicOptions',
|
||||
@@ -243,6 +245,52 @@ class MarkupParserModule extends BaseModule {
|
||||
.sort((a, b) => b.term.length - a.term.length);
|
||||
}
|
||||
|
||||
extractTtsInstructionTags(tags = []) {
|
||||
if (!Array.isArray(tags)) return [];
|
||||
|
||||
return tags
|
||||
.map(tag => {
|
||||
const key = String(tag?.key || '').toLowerCase();
|
||||
const value = String(tag?.value || '').trim();
|
||||
const param = String(tag?.param || '').trim();
|
||||
|
||||
if (key === 'tts') {
|
||||
if (param) {
|
||||
return {
|
||||
provider: this.normalizeTtsInstructionProvider(value),
|
||||
instruction: param
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
provider: null,
|
||||
instruction: value
|
||||
};
|
||||
}
|
||||
|
||||
if (key.startsWith('tts-') && value) {
|
||||
return {
|
||||
provider: this.normalizeTtsInstructionProvider(key.slice(4)),
|
||||
instruction: value
|
||||
};
|
||||
}
|
||||
|
||||
return null;
|
||||
})
|
||||
.filter(entry => entry && entry.instruction);
|
||||
}
|
||||
|
||||
normalizeTtsInstructionProvider(provider) {
|
||||
const normalized = String(provider || '').trim().toLowerCase();
|
||||
if (!normalized) return null;
|
||||
if (normalized === 'openai' || normalized === 'openai-tts') return 'openai-tts';
|
||||
if (normalized === 'local-openai' || normalized === 'local-openai-tts') return 'local-openai-tts';
|
||||
if (normalized === 'elevenlabs' || normalized === 'elevenlabs-tts') return 'elevenlabs-tts';
|
||||
if (normalized === 'kokoro' || normalized === 'kokoro-tts') return 'kokoro-tts';
|
||||
if (normalized === 'browser' || normalized === 'browser-tts') return 'browser-tts';
|
||||
return normalized;
|
||||
}
|
||||
|
||||
smartypants(text) {
|
||||
const result = String(text)
|
||||
.replace(/---/g, '\u2014')
|
||||
|
||||
+106
-18
@@ -8,7 +8,13 @@ export class OpenAITTSModule extends ApiTTSModuleBase {
|
||||
constructor() {
|
||||
super('openai-tts', 'OpenAI TTS');
|
||||
|
||||
this.supportedVoices = [
|
||||
this.supportedModels = [
|
||||
{ id: 'tts-1', name: 'TTS-1' },
|
||||
{ id: 'tts-1-hd', name: 'TTS-1 HD' },
|
||||
{ id: 'gpt-4o-mini-tts', name: 'GPT-4o mini TTS' }
|
||||
];
|
||||
|
||||
this.legacyVoices = [
|
||||
{ id: 'alloy', name: 'Alloy', language: 'en' },
|
||||
{ id: 'ash', name: 'Ash', language: 'en' },
|
||||
{ id: 'coral', name: 'Coral', language: 'en' },
|
||||
@@ -19,6 +25,25 @@ export class OpenAITTSModule extends ApiTTSModuleBase {
|
||||
{ id: 'sage', name: 'Sage', language: 'en' },
|
||||
{ id: 'shimmer', name: 'Shimmer', language: 'en' }
|
||||
];
|
||||
|
||||
this.gpt4oMiniVoices = [
|
||||
{ id: 'alloy', name: 'Alloy', language: 'en' },
|
||||
{ id: 'ash', name: 'Ash', language: 'en' },
|
||||
{ id: 'ballad', name: 'Ballad', language: 'en' },
|
||||
{ id: 'coral', name: 'Coral', language: 'en' },
|
||||
{ id: 'echo', name: 'Echo', language: 'en' },
|
||||
{ id: 'fable', name: 'Fable', language: 'en' },
|
||||
{ id: 'nova', name: 'Nova', language: 'en' },
|
||||
{ id: 'onyx', name: 'Onyx', language: 'en' },
|
||||
{ id: 'sage', name: 'Sage', language: 'en' },
|
||||
{ id: 'shimmer', name: 'Shimmer', language: 'en' },
|
||||
{ id: 'verse', name: 'Verse', language: 'en' },
|
||||
{ id: 'marin', name: 'Marin', language: 'en' },
|
||||
{ id: 'cedar', name: 'Cedar', language: 'en' }
|
||||
];
|
||||
|
||||
this.supportedVoices = [...this.gpt4oMiniVoices];
|
||||
this.supportsTtsInstructions = true;
|
||||
|
||||
// Voice options specific to OpenAI
|
||||
this.voiceOptions = {
|
||||
@@ -62,15 +87,6 @@ export class OpenAITTSModule extends ApiTTSModuleBase {
|
||||
return false;
|
||||
}
|
||||
|
||||
// API key is already loaded in parent initialize() method
|
||||
// Just check if it's available
|
||||
if (!this.apiKey) {
|
||||
console.info('OpenAI TTS: API key not configured; provider unavailable until configured');
|
||||
this.isReady = false;
|
||||
this.reportProgress(100, 'OpenAI TTS not configured');
|
||||
return true;
|
||||
}
|
||||
|
||||
// Load preferences
|
||||
const preferredVoice = persistenceManager.getPreference('tts', `${this.id}_voice`, this.voiceOptions.voice);
|
||||
if (preferredVoice) {
|
||||
@@ -79,12 +95,25 @@ export class OpenAITTSModule extends ApiTTSModuleBase {
|
||||
|
||||
const preferredModel = persistenceManager.getPreference('tts', `${this.id}_model`, this.voiceOptions.model);
|
||||
if (preferredModel) {
|
||||
this.voiceOptions.model = preferredModel;
|
||||
this.voiceOptions.model = this.normalizeModelId(preferredModel);
|
||||
}
|
||||
|
||||
this.voices = this.getAvailableVoices();
|
||||
this.voiceOptions.voice = this.normalizeVoiceId(this.voiceOptions.voice);
|
||||
|
||||
const preferredSpeed = persistenceManager.getPreference('tts', 'speed', this.voiceOptions.speed);
|
||||
if (typeof preferredSpeed === 'number') {
|
||||
this.voiceOptions.speed = this.getApiSpeed(preferredSpeed);
|
||||
this.voiceOptions.speed = this.normalizeAppSpeed(preferredSpeed);
|
||||
}
|
||||
|
||||
// API key is already loaded in parent initialize() method.
|
||||
// Model and voice preferences still need to be available for the
|
||||
// options UI even before credentials are configured.
|
||||
if (!this.apiKey) {
|
||||
console.info('OpenAI TTS: API key not configured; provider unavailable until configured');
|
||||
this.isReady = false;
|
||||
this.reportProgress(100, 'OpenAI TTS not configured');
|
||||
return true;
|
||||
}
|
||||
|
||||
const apiReachable = await this.loadVoices();
|
||||
@@ -164,9 +193,13 @@ export class OpenAITTSModule extends ApiTTSModuleBase {
|
||||
* @returns {Array} - Array of voice objects
|
||||
*/
|
||||
getAvailableVoices() {
|
||||
this.voices = [...this.supportedVoices];
|
||||
this.voices = this.getVoicesForModel(this.voiceOptions.model);
|
||||
return this.voices;
|
||||
}
|
||||
|
||||
async getVoices() {
|
||||
return this.getAvailableVoices();
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate speech audio data using OpenAI API
|
||||
@@ -190,6 +223,11 @@ export class OpenAITTSModule extends ApiTTSModuleBase {
|
||||
response_format: this.voiceOptions.response_format || 'mp3',
|
||||
speed: this.getApiSpeed(this.voiceOptions.speed)
|
||||
};
|
||||
|
||||
const instructions = this.getRequestInstructions(options);
|
||||
if (instructions) {
|
||||
payload.instructions = instructions;
|
||||
}
|
||||
|
||||
// Make API request
|
||||
const response = await fetch(`${this.apiBaseUrl}/audio/speech`, {
|
||||
@@ -246,17 +284,20 @@ export class OpenAITTSModule extends ApiTTSModuleBase {
|
||||
if (typeof options.speed === 'number') {
|
||||
// OpenAI speech speed uses 1.0 as normal. The app-wide slider also
|
||||
// uses 1.0 as normal, so only clamp at the provider API boundary.
|
||||
this.voiceOptions.speed = this.getApiSpeed(options.speed);
|
||||
this.voiceOptions.speed = this.normalizeAppSpeed(options.speed);
|
||||
}
|
||||
|
||||
// Handle OpenAI-specific options
|
||||
if (options.model) {
|
||||
this.voiceOptions.model = options.model;
|
||||
this.voiceOptions.model = this.normalizeModelId(options.model);
|
||||
this.voices = this.getAvailableVoices();
|
||||
this.voiceOptions.voice = this.normalizeVoiceId(this.voiceOptions.voice);
|
||||
|
||||
// Save the model preference
|
||||
const persistenceManager = this.getModule('persistence-manager');
|
||||
if (persistenceManager) {
|
||||
persistenceManager.updatePreference('tts', `${this.id}_model`, options.model);
|
||||
persistenceManager.updatePreference('tts', `${this.id}_model`, this.voiceOptions.model);
|
||||
persistenceManager.updatePreference('tts', `${this.id}_voice`, this.voiceOptions.voice);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -283,7 +324,7 @@ export class OpenAITTSModule extends ApiTTSModuleBase {
|
||||
|
||||
normalizeVoiceId(voice) {
|
||||
const voiceId = this.getVoiceId(voice).toLowerCase();
|
||||
const supported = new Set(this.supportedVoices.map(item => item.id));
|
||||
const supported = new Set(this.getVoicesForModel(this.voiceOptions.model).map(item => item.id));
|
||||
|
||||
if (supported.has(voiceId)) {
|
||||
return voiceId;
|
||||
@@ -296,10 +337,57 @@ export class OpenAITTSModule extends ApiTTSModuleBase {
|
||||
return 'alloy';
|
||||
}
|
||||
|
||||
normalizeModelId(model) {
|
||||
const modelId = String(model || '').trim();
|
||||
const supported = new Set(this.supportedModels.map(item => item.id));
|
||||
if (supported.has(modelId)) {
|
||||
return modelId;
|
||||
}
|
||||
|
||||
if (modelId) {
|
||||
console.warn(`OpenAI TTS: Unsupported model "${modelId}", falling back to tts-1-hd`);
|
||||
}
|
||||
|
||||
return 'tts-1-hd';
|
||||
}
|
||||
|
||||
getVoicesForModel(model) {
|
||||
const modelId = this.normalizeModelId(model || this.voiceOptions.model);
|
||||
if (modelId === 'gpt-4o-mini-tts') {
|
||||
return [...this.gpt4oMiniVoices];
|
||||
}
|
||||
|
||||
return [...this.legacyVoices];
|
||||
}
|
||||
|
||||
getRequestInstructions(options = {}) {
|
||||
if (this.normalizeModelId(this.voiceOptions.model) !== 'gpt-4o-mini-tts') {
|
||||
return '';
|
||||
}
|
||||
|
||||
const instructions = Array.isArray(options.ttsInstructions)
|
||||
? options.ttsInstructions
|
||||
: [];
|
||||
const matching = instructions
|
||||
.filter(entry => {
|
||||
const provider = String(entry?.provider || '').trim();
|
||||
return !provider || provider === this.id;
|
||||
})
|
||||
.map(entry => String(entry?.instruction || '').trim())
|
||||
.filter(Boolean);
|
||||
|
||||
return matching.length > 0 ? matching[matching.length - 1] : '';
|
||||
}
|
||||
|
||||
getApiSpeed(speed) {
|
||||
const value = Number.isFinite(speed) ? speed : 1.0;
|
||||
const value = Number.isFinite(Number(speed)) ? Number(speed) : this.normalizeAppSpeed(speed);
|
||||
return Math.max(0.25, Math.min(4.0, value));
|
||||
}
|
||||
|
||||
normalizeAppSpeed(speed) {
|
||||
const value = Number.isFinite(Number(speed)) ? Number(speed) : 1.0;
|
||||
return Math.max(0.5, Math.min(2.0, value));
|
||||
}
|
||||
}
|
||||
|
||||
const openAITTSModule = new OpenAITTSModule();
|
||||
|
||||
@@ -37,6 +37,8 @@ class OptionsUIModule extends BaseModule {
|
||||
'createModal',
|
||||
'populateTtsSystems',
|
||||
'populateVoices',
|
||||
'ensureSelectedVoiceIsAvailable',
|
||||
'updateVoiceControlVisibility',
|
||||
'populateLanguages',
|
||||
'loadPreferences',
|
||||
'createVolumeControl',
|
||||
@@ -233,10 +235,10 @@ class OptionsUIModule extends BaseModule {
|
||||
this.elements.ttsSpeed = createUIElement('input', {
|
||||
type: 'range',
|
||||
min: 50,
|
||||
max: 150,
|
||||
max: 200,
|
||||
value: 100,
|
||||
'data-pref-bind': 'tts.speed',
|
||||
'data-pref-transform': 'centered-speed'
|
||||
'data-pref-transform': 'multiplier-percent'
|
||||
}, null, speedContainer);
|
||||
|
||||
// Update displayed value when slider changes
|
||||
@@ -301,6 +303,14 @@ class OptionsUIModule extends BaseModule {
|
||||
this.elements.ttsVoice = createUIElement('select', {
|
||||
'data-pref-bind': 'tts.voice'
|
||||
}, null, ttsVoiceContainer);
|
||||
|
||||
this.elements.localOpenAiVoice = createUIElement('input', {
|
||||
id: 'local-openai-voice',
|
||||
type: 'text',
|
||||
placeholder: 'alloy',
|
||||
'data-pref-bind': 'tts.local-openai-tts_voice'
|
||||
}, null, ttsVoiceContainer);
|
||||
this.elements.localOpenAiVoice.style.display = 'none';
|
||||
|
||||
ttsSection.appendChild(ttsVoiceContainer);
|
||||
|
||||
@@ -503,10 +513,108 @@ class OptionsUIModule extends BaseModule {
|
||||
}, null, openaiApiUrlContainer);
|
||||
|
||||
openaiSettings.appendChild(openaiApiUrlContainer);
|
||||
|
||||
const openaiModelContainer = document.createElement('div');
|
||||
openaiModelContainer.className = 'option-item';
|
||||
|
||||
const openaiModelLabel = document.createElement('label');
|
||||
openaiModelLabel.textContent = this.t('options.model') + ':';
|
||||
openaiModelContainer.appendChild(openaiModelLabel);
|
||||
|
||||
this.elements.openaiModel = createUIElement('select', {
|
||||
id: 'openai-model',
|
||||
'data-pref-bind': 'tts.openai-tts_model'
|
||||
}, null, openaiModelContainer);
|
||||
|
||||
[
|
||||
{ id: 'tts-1', name: 'TTS-1' },
|
||||
{ id: 'tts-1-hd', name: 'TTS-1 HD' },
|
||||
{ id: 'gpt-4o-mini-tts', name: 'GPT-4o mini TTS' }
|
||||
].forEach(model => {
|
||||
const option = document.createElement('option');
|
||||
option.value = model.id;
|
||||
option.textContent = model.name;
|
||||
this.elements.openaiModel.appendChild(option);
|
||||
});
|
||||
|
||||
openaiSettings.appendChild(openaiModelContainer);
|
||||
|
||||
// Local OpenAI-compatible API settings
|
||||
const localOpenAiSettings = document.createElement('div');
|
||||
localOpenAiSettings.className = 'api-settings local-openai-tts-settings';
|
||||
localOpenAiSettings.style.display = 'none';
|
||||
|
||||
const localOpenAiTitle = document.createElement('h3');
|
||||
localOpenAiTitle.textContent = this.t('options.localOpenAiSettings');
|
||||
localOpenAiSettings.appendChild(localOpenAiTitle);
|
||||
|
||||
const localOpenAiApiKeyContainer = document.createElement('div');
|
||||
localOpenAiApiKeyContainer.className = 'option-item';
|
||||
|
||||
const localOpenAiApiKeyLabel = document.createElement('label');
|
||||
localOpenAiApiKeyLabel.textContent = this.t('options.optionalApiKey') + ':';
|
||||
localOpenAiApiKeyContainer.appendChild(localOpenAiApiKeyLabel);
|
||||
|
||||
this.elements.localOpenAiApiKey = createUIElement('input', {
|
||||
type: 'password',
|
||||
'data-pref-bind': 'tts.local-openai-tts_api_key'
|
||||
}, null, localOpenAiApiKeyContainer);
|
||||
|
||||
localOpenAiSettings.appendChild(localOpenAiApiKeyContainer);
|
||||
|
||||
const localOpenAiApiUrlContainer = document.createElement('div');
|
||||
localOpenAiApiUrlContainer.className = 'option-item';
|
||||
|
||||
const localOpenAiApiUrlLabel = document.createElement('label');
|
||||
localOpenAiApiUrlLabel.textContent = this.t('options.apiUrl') + ':';
|
||||
localOpenAiApiUrlContainer.appendChild(localOpenAiApiUrlLabel);
|
||||
|
||||
this.elements.localOpenAiApiUrl = createUIElement('input', {
|
||||
type: 'text',
|
||||
'data-pref-bind': 'tts.local-openai-tts_api_url'
|
||||
}, null, localOpenAiApiUrlContainer);
|
||||
|
||||
localOpenAiSettings.appendChild(localOpenAiApiUrlContainer);
|
||||
|
||||
const localOpenAiModelContainer = document.createElement('div');
|
||||
localOpenAiModelContainer.className = 'option-item';
|
||||
|
||||
const localOpenAiModelLabel = document.createElement('label');
|
||||
localOpenAiModelLabel.textContent = this.t('options.model') + ':';
|
||||
localOpenAiModelContainer.appendChild(localOpenAiModelLabel);
|
||||
|
||||
this.elements.localOpenAiModel = createUIElement('input', {
|
||||
id: 'local-openai-model',
|
||||
type: 'text',
|
||||
placeholder: 'tts-1',
|
||||
'data-pref-bind': 'tts.local-openai-tts_model'
|
||||
}, null, localOpenAiModelContainer);
|
||||
|
||||
localOpenAiSettings.appendChild(localOpenAiModelContainer);
|
||||
|
||||
const localOpenAiTimeoutContainer = document.createElement('div');
|
||||
localOpenAiTimeoutContainer.className = 'option-item';
|
||||
|
||||
const localOpenAiTimeoutLabel = document.createElement('label');
|
||||
localOpenAiTimeoutLabel.textContent = this.t('options.requestTimeoutMs') + ':';
|
||||
localOpenAiTimeoutContainer.appendChild(localOpenAiTimeoutLabel);
|
||||
|
||||
this.elements.localOpenAiTimeout = createUIElement('input', {
|
||||
id: 'local-openai-timeout-ms',
|
||||
type: 'number',
|
||||
min: 1000,
|
||||
max: 600000,
|
||||
step: 1000,
|
||||
'data-pref-bind': 'tts.local-openai-tts_timeout_ms',
|
||||
'data-pref-transform': 'integer:1000,600000'
|
||||
}, null, localOpenAiTimeoutContainer);
|
||||
|
||||
localOpenAiSettings.appendChild(localOpenAiTimeoutContainer);
|
||||
|
||||
// Add all API settings to container
|
||||
apiSettings.appendChild(elevenLabsSettings);
|
||||
apiSettings.appendChild(openaiSettings);
|
||||
apiSettings.appendChild(localOpenAiSettings);
|
||||
|
||||
return apiSettings;
|
||||
}
|
||||
@@ -622,6 +730,15 @@ class OptionsUIModule extends BaseModule {
|
||||
if (!ttsFactory || !this.elements.ttsVoice) return;
|
||||
|
||||
const selectedHandler = this.elements.ttsSystem?.value || this.getPreference('tts', 'preferred_handler', 'none');
|
||||
this.updateVoiceControlVisibility(selectedHandler);
|
||||
|
||||
if (selectedHandler === 'local-openai-tts') {
|
||||
if (this.elements.localOpenAiVoice) {
|
||||
this.elements.localOpenAiVoice.value = this.getPreference('tts', 'local-openai-tts_voice', 'alloy');
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
const voices = typeof ttsFactory.getVoicesForHandler === 'function'
|
||||
? await ttsFactory.getVoicesForHandler(selectedHandler) || []
|
||||
: await ttsFactory.getVoices() || [];
|
||||
@@ -635,6 +752,34 @@ class OptionsUIModule extends BaseModule {
|
||||
'name',
|
||||
this.getPreference('tts', `${selectedHandler}_voice`, this.getPreference('tts', 'voice', ''))
|
||||
);
|
||||
|
||||
this.ensureSelectedVoiceIsAvailable(selectedHandler, voices);
|
||||
}
|
||||
|
||||
ensureSelectedVoiceIsAvailable(selectedHandler, voices = []) {
|
||||
if (!this.elements.ttsVoice || selectedHandler === 'local-openai-tts') return;
|
||||
if (!Array.isArray(voices) || voices.length === 0) return;
|
||||
|
||||
const available = new Set(voices.map(voice => String(voice.id || '').toLowerCase()));
|
||||
const current = String(this.elements.ttsVoice.value || '').toLowerCase();
|
||||
if (current && available.has(current)) return;
|
||||
|
||||
const fallback = voices.some(voice => voice.id === 'alloy') ? 'alloy' : voices[0].id;
|
||||
this.elements.ttsVoice.value = fallback;
|
||||
this.updatePreference('tts', 'voice', fallback);
|
||||
if (selectedHandler && selectedHandler !== 'none') {
|
||||
this.updatePreference('tts', `${selectedHandler}_voice`, fallback);
|
||||
}
|
||||
}
|
||||
|
||||
updateVoiceControlVisibility(selectedHandler) {
|
||||
const useTextVoice = selectedHandler === 'local-openai-tts';
|
||||
if (this.elements.ttsVoice) {
|
||||
this.elements.ttsVoice.style.display = useTextVoice ? 'none' : '';
|
||||
}
|
||||
if (this.elements.localOpenAiVoice) {
|
||||
this.elements.localOpenAiVoice.style.display = useTextVoice ? '' : 'none';
|
||||
}
|
||||
}
|
||||
|
||||
renderProviderStatuses() {
|
||||
@@ -698,6 +843,7 @@ class OptionsUIModule extends BaseModule {
|
||||
// Update API settings visibility based on current TTS system
|
||||
if (this.elements.ttsSystem) {
|
||||
this.updateApiSettingsVisibility(this.elements.ttsSystem.value);
|
||||
this.updateVoiceControlVisibility(this.elements.ttsSystem.value);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -753,6 +899,36 @@ class OptionsUIModule extends BaseModule {
|
||||
if (!this.getPreference('tts', 'openai-tts_api_key')) {
|
||||
this.updatePreference('tts', 'openai-tts_api_key', '');
|
||||
}
|
||||
|
||||
if (!this.getPreference('tts', 'openai-tts_model')) {
|
||||
this.updatePreference('tts', 'openai-tts_model', 'tts-1-hd');
|
||||
}
|
||||
|
||||
if (this.elements.localOpenAiApiUrl) {
|
||||
const savedUrl = this.getPreference('tts', 'local-openai-tts_api_url');
|
||||
const defaultUrl = 'http://localhost:8000/v1';
|
||||
|
||||
if (!savedUrl) {
|
||||
console.log('Options UI: Setting default local OpenAI-compatible API URL:', defaultUrl);
|
||||
this.updatePreference('tts', 'local-openai-tts_api_url', defaultUrl);
|
||||
}
|
||||
}
|
||||
|
||||
if (!this.getPreference('tts', 'local-openai-tts_api_key')) {
|
||||
this.updatePreference('tts', 'local-openai-tts_api_key', '');
|
||||
}
|
||||
|
||||
if (!this.getPreference('tts', 'local-openai-tts_voice')) {
|
||||
this.updatePreference('tts', 'local-openai-tts_voice', 'alloy');
|
||||
}
|
||||
|
||||
if (!this.getPreference('tts', 'local-openai-tts_model')) {
|
||||
this.updatePreference('tts', 'local-openai-tts_model', 'tts-1');
|
||||
}
|
||||
|
||||
if (!this.getPreference('tts', 'local-openai-tts_timeout_ms')) {
|
||||
this.updatePreference('tts', 'local-openai-tts_timeout_ms', 60000);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -895,6 +1071,7 @@ class OptionsUIModule extends BaseModule {
|
||||
this.renderProviderStatuses();
|
||||
});
|
||||
this.updateApiSettingsVisibility(value);
|
||||
this.updateVoiceControlVisibility(value);
|
||||
} else if (key === 'voice') {
|
||||
ttsFactory.configure({ voice: value });
|
||||
} else if (key === 'speed') {
|
||||
@@ -919,6 +1096,24 @@ class OptionsUIModule extends BaseModule {
|
||||
const provider = key.replace('_api_url', '');
|
||||
this.dispatchApiChangeEvent('api:urlChanged', provider, 'url', value);
|
||||
ttsFactory.refreshHandlerStatus(provider).then(() => this.renderProviderStatuses());
|
||||
} else if (key.endsWith('_voice')) {
|
||||
const provider = key.replace('_voice', '');
|
||||
const handler = typeof ttsFactory.getHandler === 'function' ? ttsFactory.getHandler(provider) : null;
|
||||
if (handler && typeof handler.setVoiceOptions === 'function') {
|
||||
handler.setVoiceOptions({ voice: value });
|
||||
}
|
||||
if (ttsFactory.activeHandler === provider) {
|
||||
ttsFactory.voice = value;
|
||||
}
|
||||
} else if (key.endsWith('_model')) {
|
||||
const provider = key.replace('_model', '');
|
||||
const handler = typeof ttsFactory.getHandler === 'function' ? ttsFactory.getHandler(provider) : null;
|
||||
if (handler && typeof handler.setVoiceOptions === 'function') {
|
||||
handler.setVoiceOptions({ model: value });
|
||||
}
|
||||
if (provider === 'openai-tts') {
|
||||
this.populateVoices();
|
||||
}
|
||||
}
|
||||
if (key === 'speed' && this.elements.ttsSpeed) {
|
||||
this.updateSpeedDisplay();
|
||||
|
||||
@@ -35,10 +35,20 @@ class PersistenceManagerModule extends BaseModule {
|
||||
speed: 1.0,
|
||||
language: 'en_US',
|
||||
voice: '',
|
||||
'browser-tts_timeout_ms': 60000,
|
||||
'kokoro-tts_timeout_ms': 60000,
|
||||
'elevenlabs-tts_api_key': '',
|
||||
'elevenlabs-tts_api_url': 'https://api.elevenlabs.io/v1',
|
||||
'elevenlabs-tts_timeout_ms': 60000,
|
||||
'openai-tts_api_key': '',
|
||||
'openai-tts_api_url': 'https://api.openai.com/v1'
|
||||
'openai-tts_api_url': 'https://api.openai.com/v1',
|
||||
'openai-tts_model': 'tts-1-hd',
|
||||
'openai-tts_timeout_ms': 60000,
|
||||
'local-openai-tts_api_key': '',
|
||||
'local-openai-tts_api_url': 'http://localhost:8000/v1',
|
||||
'local-openai-tts_voice': 'alloy',
|
||||
'local-openai-tts_model': 'tts-1',
|
||||
'local-openai-tts_timeout_ms': 60000
|
||||
},
|
||||
audio: {
|
||||
masterVolume: 1.0,
|
||||
@@ -629,13 +639,39 @@ class PersistenceManagerModule extends BaseModule {
|
||||
// Check if it's a range transformer in format 'range:min,max'
|
||||
if (element.dataset.prefTransform === 'centered-speed') {
|
||||
transformer = {
|
||||
toElement: (value) => Math.round(((Number(value) || 1) * 50) + 50),
|
||||
toPreference: (value) => Math.max(0.5, Math.min(2.0, (parseInt(value, 10) - 50) / 50))
|
||||
toElement: (value) => Math.round(Math.max(0.5, Math.min(2.0, Number(value) || 1)) * 100),
|
||||
toPreference: (value) => {
|
||||
const percent = parseInt(value, 10);
|
||||
return Math.max(0.5, Math.min(2.0, (Number.isFinite(percent) ? percent : 100) / 100));
|
||||
}
|
||||
};
|
||||
} else if (element.dataset.prefTransform === 'multiplier-percent') {
|
||||
transformer = {
|
||||
toElement: (value) => Math.round((Number(value) || 1) * 100),
|
||||
toPreference: (value) => Math.max(0.25, Math.min(4.0, parseInt(value, 10) / 100))
|
||||
toElement: (value) => Math.round(Math.max(0.5, Math.min(2.0, Number(value) || 1)) * 100),
|
||||
toPreference: (value) => {
|
||||
const percent = parseInt(value, 10);
|
||||
return Math.max(0.5, Math.min(2.0, (Number.isFinite(percent) ? percent : 100) / 100));
|
||||
}
|
||||
};
|
||||
} else if (element.dataset.prefTransform.startsWith('integer:')) {
|
||||
const rangeValues = element.dataset.prefTransform.substring(8).split(',');
|
||||
const min = Number.parseInt(rangeValues[0], 10);
|
||||
const max = Number.parseInt(rangeValues[1], 10);
|
||||
transformer = {
|
||||
toElement: (value) => Number.parseInt(value, 10),
|
||||
toPreference: (value) => {
|
||||
const parsed = Number.parseInt(value, 10);
|
||||
if (!Number.isFinite(parsed)) {
|
||||
return Number.isFinite(min) ? min : 0;
|
||||
}
|
||||
if (Number.isFinite(min) && parsed < min) {
|
||||
return min;
|
||||
}
|
||||
if (Number.isFinite(max) && parsed > max) {
|
||||
return max;
|
||||
}
|
||||
return parsed;
|
||||
}
|
||||
};
|
||||
} else if (element.dataset.prefTransform.startsWith('range:')) {
|
||||
const rangeValues = element.dataset.prefTransform.substring(6).split(',');
|
||||
|
||||
@@ -45,6 +45,8 @@ class SentenceQueueModule extends BaseModule {
|
||||
'prepareSpeechMetadata',
|
||||
'preloadAssetsForItem',
|
||||
'normalizeTtsText',
|
||||
'getConfiguredTtsGenerationTimeoutMs',
|
||||
'normalizeTtsGenerationTimeoutMs',
|
||||
'runTtsPreloadWithTimeout',
|
||||
'cancelBlockingGeneration',
|
||||
'cancelGenerationRequests',
|
||||
@@ -89,19 +91,25 @@ class SentenceQueueModule extends BaseModule {
|
||||
const persistenceManager = this.getModule('persistence-manager');
|
||||
if (persistenceManager && typeof persistenceManager.getPreference === 'function') {
|
||||
this.autoplay = persistenceManager.getPreference('app', 'autoplay', true) !== false;
|
||||
this.ttsGenerationTimeoutMs = this.getConfiguredTtsGenerationTimeoutMs();
|
||||
}
|
||||
this.addEventListener(document, 'preference-updated', (event) => {
|
||||
const { category, key, value } = event.detail || {};
|
||||
if (category === 'app' && key === 'autoplay') {
|
||||
this.autoplay = value !== false;
|
||||
}
|
||||
if (category === 'tts' && (key === 'preferred_handler' || key.endsWith('_timeout_ms'))) {
|
||||
this.ttsGenerationTimeoutMs = this.getConfiguredTtsGenerationTimeoutMs();
|
||||
}
|
||||
});
|
||||
this.addEventListener(document, 'story:input-mode', (event) => {
|
||||
this.inputMode = ['text', 'choice', 'end'].includes(event.detail) ? event.detail : 'text';
|
||||
});
|
||||
this.addEventListener(document, 'ui:command', (event) => {
|
||||
if (event.detail?.type === 'continue') {
|
||||
this.lastContinueAt = performance.now();
|
||||
if (event.detail?.source !== 'display-clear') {
|
||||
this.lastContinueAt = performance.now();
|
||||
}
|
||||
this.cancelBlockingGeneration('user-fast-forward', {
|
||||
minWaitMs: USER_CANCEL_BLOCKING_WAIT_MIN_MS
|
||||
});
|
||||
@@ -305,11 +313,35 @@ class SentenceQueueModule extends BaseModule {
|
||||
.trim();
|
||||
}
|
||||
|
||||
getConfiguredTtsGenerationTimeoutMs() {
|
||||
const persistenceManager = this.getModule('persistence-manager');
|
||||
if (!persistenceManager || typeof persistenceManager.getPreference !== 'function') {
|
||||
return TTS_GENERATION_TIMEOUT_MS;
|
||||
}
|
||||
|
||||
const preferredHandler = persistenceManager.getPreference('tts', 'preferred_handler', 'none');
|
||||
const providerTimeout = preferredHandler && preferredHandler !== 'none'
|
||||
? persistenceManager.getPreference('tts', `${preferredHandler}_timeout_ms`)
|
||||
: undefined;
|
||||
const genericTimeout = persistenceManager.getPreference('tts', 'generation_timeout_ms');
|
||||
|
||||
return this.normalizeTtsGenerationTimeoutMs(providerTimeout ?? genericTimeout ?? TTS_GENERATION_TIMEOUT_MS);
|
||||
}
|
||||
|
||||
normalizeTtsGenerationTimeoutMs(value) {
|
||||
const timeout = Number(value);
|
||||
if (!Number.isFinite(timeout)) {
|
||||
return TTS_GENERATION_TIMEOUT_MS;
|
||||
}
|
||||
return Math.max(1000, Math.min(600000, Math.round(timeout)));
|
||||
}
|
||||
|
||||
runTtsPreloadWithTimeout(ttsFactory, text, context = {}) {
|
||||
const sentenceId = context.sentenceId || context.id || `tts-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
||||
const requestId = `${sentenceId}:${context.prefetch ? 'prefetch' : 'blocking'}:${Date.now()}`;
|
||||
const controller = new AbortController();
|
||||
const startedAt = performance.now();
|
||||
const timeoutMs = this.getConfiguredTtsGenerationTimeoutMs();
|
||||
|
||||
return new Promise((resolve) => {
|
||||
let settled = false;
|
||||
@@ -324,12 +356,12 @@ class SentenceQueueModule extends BaseModule {
|
||||
const timeoutId = setTimeout(() => {
|
||||
console.warn('SentenceQueue: TTS generation timed out; continuing without audio', {
|
||||
sentenceId,
|
||||
timeoutMs: this.ttsGenerationTimeoutMs,
|
||||
timeoutMs,
|
||||
textPreview: text.slice(0, 120)
|
||||
});
|
||||
controller.abort('tts-generation-timeout');
|
||||
finish({ success: false, reason: 'tts_generation_timeout', timedOut: true });
|
||||
}, this.ttsGenerationTimeoutMs);
|
||||
}, timeoutMs);
|
||||
|
||||
this.generationRequests.set(requestId, {
|
||||
controller,
|
||||
@@ -340,7 +372,10 @@ class SentenceQueueModule extends BaseModule {
|
||||
finish
|
||||
});
|
||||
|
||||
Promise.resolve(ttsFactory.preloadSpeech(text, { signal: controller.signal }))
|
||||
Promise.resolve(ttsFactory.preloadSpeech(text, {
|
||||
signal: controller.signal,
|
||||
ttsInstructions: Array.isArray(context.ttsInstructions) ? context.ttsInstructions : []
|
||||
}))
|
||||
.then(result => finish(result || { success: false, reason: 'empty_tts_result' }))
|
||||
.catch(error => {
|
||||
if (controller.signal.aborted) {
|
||||
@@ -426,7 +461,10 @@ class SentenceQueueModule extends BaseModule {
|
||||
let speedMultiplier = 1.0;
|
||||
const ttsFactory = this.getModule('tts-factory');
|
||||
if (ttsFactory) {
|
||||
speedMultiplier = Number.isFinite(ttsFactory.speed) ? Math.max(0.25, ttsFactory.speed) : 1.0;
|
||||
const configuredSpeed = Number(ttsFactory.speed);
|
||||
speedMultiplier = Number.isFinite(configuredSpeed)
|
||||
? Math.max(0.5, Math.min(2.0, configuredSpeed))
|
||||
: 1.0;
|
||||
}
|
||||
|
||||
// Calculate estimated duration in milliseconds
|
||||
@@ -486,6 +524,7 @@ class SentenceQueueModule extends BaseModule {
|
||||
sentenceId: id,
|
||||
blockId: metadata.blockId ?? null,
|
||||
turnId: metadata.turnId ?? null,
|
||||
ttsInstructions: Array.isArray(metadata.ttsInstructions) ? metadata.ttsInstructions : [],
|
||||
blocking: true
|
||||
});
|
||||
|
||||
@@ -501,6 +540,7 @@ class SentenceQueueModule extends BaseModule {
|
||||
paragraphIndex: metadata.paragraphIndex ?? null,
|
||||
layoutText: metadata.layoutText || text,
|
||||
glossaryEntries: Array.isArray(metadata.glossaryEntries) ? metadata.glossaryEntries : [],
|
||||
ttsInstructions: Array.isArray(metadata.ttsInstructions) ? metadata.ttsInstructions : [],
|
||||
isFirstParagraphInChapter: Boolean(metadata.isFirstParagraphInChapter),
|
||||
role: metadata.role || (metadata.type === 'heading' ? 'chapter-heading' : 'body'),
|
||||
dropCap: Boolean(metadata.dropCap),
|
||||
@@ -753,9 +793,6 @@ class SentenceQueueModule extends BaseModule {
|
||||
if (this.lastContinueAt >= (sentence.playbackStartedAt || 0)) {
|
||||
return false;
|
||||
}
|
||||
if (this.inputMode === 'choice') {
|
||||
return false;
|
||||
}
|
||||
return this.sentenceQueue.length > 1;
|
||||
}
|
||||
|
||||
@@ -848,6 +885,7 @@ class SentenceQueueModule extends BaseModule {
|
||||
sentenceId: nextItem.id,
|
||||
blockId: nextItem.blockId ?? null,
|
||||
turnId: nextItem.turnId ?? null,
|
||||
ttsInstructions: Array.isArray(nextItem.ttsInstructions) ? nextItem.ttsInstructions : [],
|
||||
queueIndex: index,
|
||||
prefetch: true,
|
||||
blocking: false
|
||||
|
||||
@@ -291,12 +291,13 @@ class SocketClientModule extends BaseModule {
|
||||
}
|
||||
}
|
||||
|
||||
await this.storeAndQueueBlocks(turnBlocks);
|
||||
|
||||
const choices = Array.isArray(data.choices) ? data.choices : [];
|
||||
const inputMode = data.inputMode || (choices.length > 0 ? 'choice' : 'none');
|
||||
this.dispatchChoices(choices);
|
||||
this.dispatchInputMode(inputMode);
|
||||
|
||||
await this.storeAndQueueBlocks(turnBlocks);
|
||||
|
||||
document.dispatchEvent(new CustomEvent('story:turn-complete', {
|
||||
detail: { turnId, turn: data, choices, inputMode }
|
||||
}));
|
||||
@@ -392,6 +393,9 @@ class SocketClientModule extends BaseModule {
|
||||
const glossaryEntries = markupParser && typeof markupParser.extractGlossaryTags === 'function'
|
||||
? markupParser.extractGlossaryTags(tags)
|
||||
: [];
|
||||
const ttsInstructions = markupParser && typeof markupParser.extractTtsInstructionTags === 'function'
|
||||
? markupParser.extractTtsInstructionTags(tags)
|
||||
: [];
|
||||
const cueTags = tags.filter(tag => this.isTimedCueTag(tag));
|
||||
const deferredTags = tags.filter(tag => this.isDeferredPopupTag(tag));
|
||||
const immediateTags = tags.filter(tag =>
|
||||
@@ -433,6 +437,7 @@ class SocketClientModule extends BaseModule {
|
||||
text,
|
||||
layoutText,
|
||||
glossaryEntries,
|
||||
ttsInstructions,
|
||||
cueMarkers,
|
||||
deferredTags: [
|
||||
...(Array.isArray(pending.deferredTags) ? pending.deferredTags : []),
|
||||
@@ -503,7 +508,7 @@ class SocketClientModule extends BaseModule {
|
||||
|
||||
isRenderMetadataTag(tag) {
|
||||
const key = String(tag?.key || '').toLowerCase();
|
||||
return ['gloss'].includes(key);
|
||||
return key === 'gloss' || key === 'tts' || key.startsWith('tts-');
|
||||
}
|
||||
|
||||
isDeferredPopupTag(tag) {
|
||||
|
||||
@@ -18,7 +18,8 @@ class TTSFactoryModule extends BaseModule {
|
||||
'browser-tts', // Browser TTS handler
|
||||
'kokoro-tts', // Kokoro TTS handler
|
||||
'elevenlabs-tts',// ElevenLabs TTS handler
|
||||
'openai-tts' // OpenAI TTS handler
|
||||
'openai-tts', // OpenAI TTS handler
|
||||
'local-openai-tts' // Local OpenAI-compatible TTS handler
|
||||
];
|
||||
this.handlers = {};
|
||||
this.initStatus = {};
|
||||
@@ -356,7 +357,7 @@ class TTSFactoryModule extends BaseModule {
|
||||
}
|
||||
|
||||
// Add placeholder entries for important API handlers that might not be registered yet
|
||||
const apiHandlerIds = ['elevenlabs-tts', 'openai-tts'];
|
||||
const apiHandlerIds = ['elevenlabs-tts', 'openai-tts', 'local-openai-tts'];
|
||||
for (const id of apiHandlerIds) {
|
||||
// Only add if not already in the list
|
||||
if (!this.handlers[id] && !availableHandlers.some(h => h.id === id)) {
|
||||
@@ -407,10 +408,24 @@ class TTSFactoryModule extends BaseModule {
|
||||
'voice': '', // Empty default - will be selected based on handler
|
||||
'language': 'en_US', // Legacy stored value; game metadata now owns active TTS language
|
||||
'volume': 1.0, // Default volume
|
||||
'browser-tts_timeout_ms': 60000,
|
||||
'kokoro-tts_timeout_ms': 60000,
|
||||
'elevenlabs_api_key': '', // Empty API key by default
|
||||
'elevenlabs_api_url': 'https://api.elevenlabs.io/v1', // Default ElevenLabs API URL
|
||||
'openai_api_key': '', // Empty API key by default
|
||||
'openai_api_url': 'https://api.openai.com/v1' // Default OpenAI API URL
|
||||
'openai_api_url': 'https://api.openai.com/v1', // Default OpenAI API URL
|
||||
'elevenlabs-tts_api_key': '',
|
||||
'elevenlabs-tts_api_url': 'https://api.elevenlabs.io/v1',
|
||||
'elevenlabs-tts_timeout_ms': 60000,
|
||||
'openai-tts_api_key': '',
|
||||
'openai-tts_api_url': 'https://api.openai.com/v1',
|
||||
'openai-tts_model': 'tts-1-hd',
|
||||
'openai-tts_timeout_ms': 60000,
|
||||
'local-openai-tts_api_key': '',
|
||||
'local-openai-tts_api_url': 'http://localhost:8000/v1',
|
||||
'local-openai-tts_voice': 'alloy',
|
||||
'local-openai-tts_model': 'tts-1',
|
||||
'local-openai-tts_timeout_ms': 60000
|
||||
};
|
||||
|
||||
// Ensure all defaults are set in persistence if they don't exist
|
||||
@@ -475,7 +490,8 @@ class TTSFactoryModule extends BaseModule {
|
||||
{ id: 'kokoro-tts', displayName: 'Kokoro TTS' },
|
||||
{ id: 'browser-tts', displayName: 'Browser TTS' },
|
||||
{ id: 'elevenlabs-tts', displayName: 'ElevenLabs TTS' },
|
||||
{ id: 'openai-tts', displayName: 'OpenAI TTS' }
|
||||
{ id: 'openai-tts', displayName: 'OpenAI TTS' },
|
||||
{ id: 'local-openai-tts', displayName: 'Local OpenAI TTS' }
|
||||
];
|
||||
|
||||
// Register each handler
|
||||
@@ -780,7 +796,7 @@ class TTSFactoryModule extends BaseModule {
|
||||
}
|
||||
|
||||
// Check if we have this speech cached
|
||||
const hash = await this.generateSpeechHash(text);
|
||||
const hash = await this.generateSpeechHash(text, options);
|
||||
const cached = await this.getCachedSpeech(hash);
|
||||
|
||||
if (cached && cached.success) {
|
||||
@@ -845,7 +861,7 @@ class TTSFactoryModule extends BaseModule {
|
||||
|
||||
try {
|
||||
// Generate a hash for this speech request
|
||||
const hash = await this.generateSpeechHash(text);
|
||||
const hash = await this.generateSpeechHash(text, options);
|
||||
|
||||
// Check if we have this speech cached
|
||||
const cached = await this.getCachedSpeech(hash);
|
||||
@@ -1097,6 +1113,7 @@ class TTSFactoryModule extends BaseModule {
|
||||
getHandlerStatusMessage(id, handler) {
|
||||
if (!handler) return 'Not registered';
|
||||
if (handler.isReady === true) return 'Ready';
|
||||
if (handler.unsupportedReason) return handler.unsupportedReason;
|
||||
if (id === 'kokoro-tts') return handler.state === 'INITIALIZING' ? 'Loading model' : 'Not loaded';
|
||||
if (handler.apiKey === '') return 'API key missing';
|
||||
if (handler.apiKey && handler.isReady !== true) return 'API unavailable or invalid settings';
|
||||
@@ -1234,7 +1251,7 @@ class TTSFactoryModule extends BaseModule {
|
||||
let generationStarted = false;
|
||||
try {
|
||||
// Generate a hash for this speech request
|
||||
hash = await this.generateSpeechHash(text);
|
||||
hash = await this.generateSpeechHash(text, options);
|
||||
|
||||
// Check if we have this audio in cache
|
||||
const cachedData = await this.getCachedSpeech(hash);
|
||||
@@ -1286,17 +1303,23 @@ class TTSFactoryModule extends BaseModule {
|
||||
* @param {string} text - Text to generate hash for
|
||||
* @returns {Promise<string>} - Hash string
|
||||
*/
|
||||
async generateSpeechHash(text) {
|
||||
async generateSpeechHash(text, options = {}) {
|
||||
const handler = this.getActiveHandler();
|
||||
const provider = this.activeHandler || 'none';
|
||||
const voiceInfo = this.getEffectiveVoiceId(handler);
|
||||
const model = handler?.voiceOptions?.model || handler?.model || '';
|
||||
const speed = this.speed || 1.0;
|
||||
const language = this.language || 'en-us';
|
||||
const ttsInstruction = handler && typeof handler.getRequestInstructions === 'function'
|
||||
? handler.getRequestInstructions(options)
|
||||
: '';
|
||||
const key = JSON.stringify({
|
||||
provider,
|
||||
voice: voiceInfo,
|
||||
model,
|
||||
speed,
|
||||
language,
|
||||
ttsInstruction,
|
||||
text
|
||||
});
|
||||
|
||||
@@ -1933,7 +1956,7 @@ class TTSFactoryModule extends BaseModule {
|
||||
const handler = this.handlers[id];
|
||||
const isInitialized = !!this.initStatus[id];
|
||||
const isReady = handler && handler.isReady;
|
||||
const isApiHandler = ['elevenlabs', 'openai', 'kokoro'].includes(id);
|
||||
const isApiHandler = ['elevenlabs-tts', 'openai-tts', 'local-openai-tts', 'kokoro-tts'].includes(id);
|
||||
|
||||
console.log(`Handler ID: ${id}`);
|
||||
console.log(` - Handler Exists: ${!!handler}`);
|
||||
|
||||
@@ -387,12 +387,12 @@ class UIControllerModule extends BaseModule {
|
||||
|
||||
sliderValueFromSpeed(speed) {
|
||||
const value = Number.isFinite(Number(speed)) ? Number(speed) : 1;
|
||||
return Math.round((Math.max(0.5, Math.min(2.0, value)) * 50) + 50);
|
||||
return Math.round(Math.max(0.5, Math.min(2.0, value)) * 100);
|
||||
}
|
||||
|
||||
speedFromSliderValue(value) {
|
||||
const sliderValue = Number.isFinite(Number(value)) ? Number(value) : 50;
|
||||
return Math.max(0.5, Math.min(2.0, (sliderValue - 50) / 50));
|
||||
const sliderValue = Number.isFinite(Number(value)) ? Number(value) : 100;
|
||||
return Math.max(0.5, Math.min(2.0, sliderValue / 100));
|
||||
}
|
||||
|
||||
bindTopControls() {
|
||||
@@ -453,14 +453,13 @@ class UIControllerModule extends BaseModule {
|
||||
|
||||
if (speedSlider && speedSlider.dataset.uiControllerBound !== 'true') {
|
||||
speedSlider.dataset.uiControllerBound = 'true';
|
||||
speedSlider.min = speedSlider.min || '50';
|
||||
speedSlider.max = speedSlider.max || '150';
|
||||
speedSlider.min = '50';
|
||||
speedSlider.max = '200';
|
||||
speedSlider.addEventListener('input', (event) => {
|
||||
const persistenceManager = this.getModule('persistence-manager');
|
||||
const speed = this.speedFromSliderValue(event.target.value);
|
||||
|
||||
document.dispatchEvent(new CustomEvent('animation:speed:change', {
|
||||
detail: { speed: 1 }
|
||||
detail: { speed }
|
||||
}));
|
||||
|
||||
document.dispatchEvent(new CustomEvent('tts:speed:change', {
|
||||
|
||||
@@ -386,7 +386,7 @@ class UIDisplayHandlerModule extends BaseModule {
|
||||
controls.innerHTML = `
|
||||
<a id="speech"></a>
|
||||
<a id="autoplay"></a>
|
||||
<span><a id="speed_reset"><span id="speed_label"></span></a><input type="range" min="50" max="150" value="100" id="speed" name="speed" /></span>
|
||||
<span><a id="speed_reset"><span id="speed_label"></span></a><input type="range" min="50" max="200" value="100" id="speed" name="speed" /></span>
|
||||
<a id="rewind"></a>
|
||||
<a id="save"></a>
|
||||
<a id="reload" disabled="disabled"></a>
|
||||
|
||||
@@ -47,8 +47,12 @@
|
||||
"options.enableMusicDucking": "Musikabsenkung einschalten",
|
||||
"options.elevenLabsSettings": "ElevenLabs API-Einstellungen",
|
||||
"options.openAiSettings": "OpenAI API-Einstellungen",
|
||||
"options.localOpenAiSettings": "Lokale OpenAI API-Einstellungen",
|
||||
"options.optionalApiKey": "API-Schluessel (optional)",
|
||||
"options.apiKey": "API-Schlüssel",
|
||||
"options.apiUrl": "API-URL",
|
||||
"options.model": "Modell",
|
||||
"options.requestTimeoutMs": "Anfrage-Timeout (ms)",
|
||||
"credits.button": "Credits",
|
||||
"credits.buttonTitle": "Mitwirkende und Lizenzen anzeigen",
|
||||
"credits.title": "Mitwirkende und Lizenzen",
|
||||
|
||||
@@ -47,8 +47,12 @@
|
||||
"options.enableMusicDucking": "Enable music ducking",
|
||||
"options.elevenLabsSettings": "ElevenLabs API Settings",
|
||||
"options.openAiSettings": "OpenAI API Settings",
|
||||
"options.localOpenAiSettings": "Local OpenAI API Settings",
|
||||
"options.apiKey": "API Key",
|
||||
"options.optionalApiKey": "API Key (optional)",
|
||||
"options.apiUrl": "API URL",
|
||||
"options.model": "Model",
|
||||
"options.requestTimeoutMs": "Request timeout (ms)",
|
||||
"credits.button": "credits",
|
||||
"credits.buttonTitle": "Show credits and third-party licenses",
|
||||
"credits.title": "Credits and Licenses",
|
||||
|
||||
@@ -23,13 +23,14 @@
|
||||
<div class="option-item">
|
||||
<label>Voice:</label>
|
||||
<select id="tts-voice" data-pref-bind="tts.voice"></select>
|
||||
<input type="text" id="local-openai-voice" data-pref-bind="tts.local-openai-tts_voice" placeholder="alloy" style="display: none;">
|
||||
</div>
|
||||
|
||||
<div class="option-item">
|
||||
<label>Speech:</label>
|
||||
<span class="slider-value">100%</span>
|
||||
<input type="range" id="tts-speed" min="50" max="200" value="100"
|
||||
data-pref-bind="app.speed" data-pref-transform="range:0.5,2.0">
|
||||
data-pref-bind="tts.speed" data-pref-transform="multiplier-percent">
|
||||
</div>
|
||||
|
||||
<!-- API Settings -->
|
||||
@@ -50,7 +51,7 @@
|
||||
</div>
|
||||
|
||||
<!-- OpenAI Settings -->
|
||||
<div class="api-settings openai-settings" style="display: none;">
|
||||
<div class="api-settings openai-tts-settings" style="display: none;">
|
||||
<h3>OpenAI API Settings</h3>
|
||||
|
||||
<div class="option-item">
|
||||
@@ -62,6 +63,41 @@
|
||||
<label>API URL:</label>
|
||||
<input type="text" id="openai-api-url" data-pref-bind="tts.openai-tts_api_url">
|
||||
</div>
|
||||
|
||||
<div class="option-item">
|
||||
<label>Model:</label>
|
||||
<select id="openai-model" data-pref-bind="tts.openai-tts_model">
|
||||
<option value="tts-1">TTS-1</option>
|
||||
<option value="tts-1-hd">TTS-1 HD</option>
|
||||
<option value="gpt-4o-mini-tts">GPT-4o mini TTS</option>
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Local OpenAI-compatible Settings -->
|
||||
<div class="api-settings local-openai-tts-settings" style="display: none;">
|
||||
<h3>Local OpenAI API Settings</h3>
|
||||
|
||||
<div class="option-item">
|
||||
<label>API Key (optional):</label>
|
||||
<input type="password" id="local-openai-api-key" data-pref-bind="tts.local-openai-tts_api_key">
|
||||
</div>
|
||||
|
||||
<div class="option-item">
|
||||
<label>API URL:</label>
|
||||
<input type="text" id="local-openai-api-url" data-pref-bind="tts.local-openai-tts_api_url">
|
||||
</div>
|
||||
|
||||
<div class="option-item">
|
||||
<label>Model:</label>
|
||||
<input type="text" id="local-openai-model" data-pref-bind="tts.local-openai-tts_model" placeholder="tts-1">
|
||||
</div>
|
||||
|
||||
<div class="option-item">
|
||||
<label>Request timeout (ms):</label>
|
||||
<input type="number" id="local-openai-timeout-ms" min="1000" max="600000" step="1000"
|
||||
data-pref-bind="tts.local-openai-tts_timeout_ms" data-pref-transform="integer:1000,600000">
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
Reference in New Issue
Block a user