Update TTS providers and story markup

This commit is contained in:
2026-05-20 22:13:31 +02:00
parent b911c40d89
commit 8258ea2321
36 changed files with 1482 additions and 197 deletions
+191 -12
View File
@@ -27,6 +27,12 @@ class LayoutRendererModule extends BaseModule {
'decorateInlineWord',
'applyGlossaryEntries',
'normalizeGlossaryText',
'normalizeGlossaryToken',
'normalizeGlossaryCompact',
'buildGlossaryTermPatterns',
'buildCompactGlossaryTermPatterns',
'decorateGlossarySegment',
'decorateGlossaryRange',
'decorateGlossaryWord',
'ensureGlossaryTooltip',
'showGlossaryTooltip',
@@ -337,34 +343,56 @@ class LayoutRendererModule extends BaseModule {
let cursor = 0;
const segments = [];
let compactCursor = 0;
const compactSegments = [];
const fullText = words.map((word, index) => {
if (index > 0) cursor += 1;
const start = cursor;
cursor += word.text.length;
segments.push({ ...word, start, end: cursor });
const compactText = this.normalizeGlossaryCompact(word.text);
if (compactText) {
const compactStart = compactCursor;
compactCursor += compactText.length;
compactSegments.push({ ...word, start: compactStart, end: compactCursor });
}
return word.text;
}).join(' ');
const compactFullText = words.map(word => this.normalizeGlossaryCompact(word.text)).join('');
entries
.filter(entry => entry && entry.term && entry.definition)
.forEach(entry => {
const normalizedTerm = this.normalizeGlossaryText(entry.term);
if (!normalizedTerm) return;
const matcher = new RegExp(`(^|\\s)(${this.escapeRegExp(normalizedTerm)})(?=\\s|$|[.,;:!?])`, 'giu');
let match;
while ((match = matcher.exec(fullText)) !== null) {
const matchStart = match.index + match[1].length;
const matchEnd = matchStart + match[2].length;
segments
.filter(segment => segment.end > matchStart && segment.start < matchEnd)
.forEach(segment => this.decorateGlossaryWord(segment.element, entry));
}
this.buildGlossaryTermPatterns(entry.term).forEach((pattern) => {
const matcher = new RegExp(`(^|\\s)(${pattern})(?=\\s|$|[.,;:!?])`, 'giu');
let match;
while ((match = matcher.exec(fullText)) !== null) {
const matchStart = match.index + match[1].length;
const matchEnd = matchStart + match[2].length;
segments
.filter(segment => segment.end > matchStart && segment.start < matchEnd)
.forEach(segment => this.decorateGlossarySegment(segment, entry, matchStart, matchEnd, 'text'));
}
});
this.buildCompactGlossaryTermPatterns(entry.term).forEach((pattern) => {
const matcher = new RegExp(pattern, 'giu');
let match;
while ((match = matcher.exec(compactFullText)) !== null) {
const matchStart = match.index;
const matchEnd = matchStart + match[0].length;
compactSegments
.filter(segment => segment.end > matchStart && segment.start < matchEnd)
.forEach(segment => this.decorateGlossarySegment(segment, entry, matchStart, matchEnd, 'compact'));
}
});
});
}
normalizeGlossaryText(text) {
return String(text || '')
.normalize('NFC')
.replace(/\u200c/g, '')
.replace(/\u00ad/g, '')
.replace(/-\s*$/g, '')
@@ -372,6 +400,157 @@ class LayoutRendererModule extends BaseModule {
.trim();
}
normalizeGlossaryToken(text) {
return this.normalizeGlossaryText(text)
.replace(/^[.,;:!?()[\]{}"'„“”‚‘’»«]+|[.,;:!?()[\]{}"'„“”‚‘’»«]+$/g, '');
}
normalizeGlossaryCompact(text) {
return this.normalizeGlossaryToken(text)
.replace(/[-\s]+/g, '')
.replace(/[.,;:!?()[\]{}"'„“”‚‘’»«]+/g, '');
}
buildGlossaryTermPatterns(term) {
const normalizedTerm = this.normalizeGlossaryText(term);
if (!normalizedTerm) return [];
const exact = normalizedTerm
.split(/\s+/)
.map(token => this.escapeRegExp(this.normalizeGlossaryToken(token)))
.filter(Boolean)
.join('\\s+');
if (!exact) return [];
const inflected = normalizedTerm
.split(/\s+/)
.map((token, index, tokens) => {
const normalized = this.normalizeGlossaryToken(token);
if (!normalized) return '';
const escaped = this.escapeRegExp(normalized);
const isLast = index === tokens.length - 1;
return isLast ? `${escaped}(?:s|es|e|en|er|n)?` : `${escaped}(?:e|en|er|es|n)?`;
})
.filter(Boolean)
.join('\\s+');
return [...new Set([exact, inflected])];
}
buildCompactGlossaryTermPatterns(term) {
const tokens = this.normalizeGlossaryText(term)
.split(/\s+/)
.map(token => this.normalizeGlossaryCompact(token))
.filter(Boolean);
if (tokens.length === 0) return [];
const exact = tokens.map(token => this.escapeRegExp(token)).join('');
const inflected = tokens
.map((token, index) => {
const escaped = this.escapeRegExp(token);
const isLast = index === tokens.length - 1;
return isLast ? `${escaped}(?:s|es|e|en|er|n)?` : `${escaped}(?:e|en|er|es|n)?`;
})
.join('');
return [...new Set([exact, inflected])];
}
decorateGlossarySegment(segment, entry, matchStart, matchEnd, mode = 'text') {
if (!segment?.element || !entry?.definition) return;
const localStart = Math.max(0, matchStart - segment.start);
const localEnd = Math.min(segment.end - segment.start, matchEnd - segment.start);
if (localEnd <= localStart) return;
const segmentLength = mode === 'compact'
? this.normalizeGlossaryCompact(segment.text).length
: segment.text.length;
if (localStart <= 0 && localEnd >= segmentLength) {
this.decorateGlossaryWord(segment.element, entry);
return;
}
if (mode === 'compact') {
return;
}
this.decorateGlossaryRange(segment.element, entry, localStart, localEnd);
}
decorateGlossaryRange(word, entry, start, end) {
if (!word || !entry?.definition) return;
const text = word.textContent || '';
const safeStart = Math.max(0, Math.min(text.length, start));
const safeEnd = Math.max(safeStart, Math.min(text.length, end));
if (safeStart === 0 && safeEnd >= text.length) {
this.decorateGlossaryWord(word, entry);
return;
}
if (safeEnd <= safeStart) return;
word.dataset.glossaryPartial = 'true';
const textNodes = [];
const filter = window.NodeFilter || NodeFilter;
const walker = document.createTreeWalker(word, filter.SHOW_TEXT);
let node;
while ((node = walker.nextNode())) {
textNodes.push(node);
}
let offset = 0;
textNodes.forEach((textNode) => {
const nodeText = textNode.nodeValue || '';
const nodeStart = offset;
const nodeEnd = nodeStart + nodeText.length;
offset = nodeEnd;
const overlapStart = Math.max(safeStart, nodeStart);
const overlapEnd = Math.min(safeEnd, nodeEnd);
if (overlapEnd <= overlapStart || !textNode.parentNode) return;
const localStart = overlapStart - nodeStart;
const localEnd = overlapEnd - nodeStart;
const before = nodeText.slice(0, localStart);
const matched = nodeText.slice(localStart, localEnd);
const after = nodeText.slice(localEnd);
const parent = textNode.parentNode;
if (before) {
parent.insertBefore(document.createTextNode(before), textNode);
}
if (matched) {
const gloss = document.createElement('span');
gloss.textContent = matched;
this.decorateGlossaryWord(gloss, entry);
parent.insertBefore(gloss, textNode);
}
if (after) {
parent.insertBefore(document.createTextNode(after), textNode);
}
parent.removeChild(textNode);
});
if (textNodes.length === 0) {
const before = text.slice(0, safeStart);
const matched = text.slice(safeStart, safeEnd);
const after = text.slice(safeEnd);
word.textContent = '';
if (before) word.appendChild(document.createTextNode(before));
const gloss = document.createElement('span');
gloss.textContent = matched;
this.decorateGlossaryWord(gloss, entry);
word.appendChild(gloss);
if (after) word.appendChild(document.createTextNode(after));
}
}
decorateGlossaryWord(word, entry) {
if (!word || !entry?.definition) return;
word.classList.add('story-glossary-word');