Update TTS providers and story markup
This commit is contained in:
@@ -27,6 +27,12 @@ class LayoutRendererModule extends BaseModule {
|
||||
'decorateInlineWord',
|
||||
'applyGlossaryEntries',
|
||||
'normalizeGlossaryText',
|
||||
'normalizeGlossaryToken',
|
||||
'normalizeGlossaryCompact',
|
||||
'buildGlossaryTermPatterns',
|
||||
'buildCompactGlossaryTermPatterns',
|
||||
'decorateGlossarySegment',
|
||||
'decorateGlossaryRange',
|
||||
'decorateGlossaryWord',
|
||||
'ensureGlossaryTooltip',
|
||||
'showGlossaryTooltip',
|
||||
@@ -337,34 +343,56 @@ class LayoutRendererModule extends BaseModule {
|
||||
|
||||
let cursor = 0;
|
||||
const segments = [];
|
||||
let compactCursor = 0;
|
||||
const compactSegments = [];
|
||||
const fullText = words.map((word, index) => {
|
||||
if (index > 0) cursor += 1;
|
||||
const start = cursor;
|
||||
cursor += word.text.length;
|
||||
segments.push({ ...word, start, end: cursor });
|
||||
|
||||
const compactText = this.normalizeGlossaryCompact(word.text);
|
||||
if (compactText) {
|
||||
const compactStart = compactCursor;
|
||||
compactCursor += compactText.length;
|
||||
compactSegments.push({ ...word, start: compactStart, end: compactCursor });
|
||||
}
|
||||
|
||||
return word.text;
|
||||
}).join(' ');
|
||||
const compactFullText = words.map(word => this.normalizeGlossaryCompact(word.text)).join('');
|
||||
|
||||
entries
|
||||
.filter(entry => entry && entry.term && entry.definition)
|
||||
.forEach(entry => {
|
||||
const normalizedTerm = this.normalizeGlossaryText(entry.term);
|
||||
if (!normalizedTerm) return;
|
||||
|
||||
const matcher = new RegExp(`(^|\\s)(${this.escapeRegExp(normalizedTerm)})(?=\\s|$|[.,;:!?])`, 'giu');
|
||||
let match;
|
||||
while ((match = matcher.exec(fullText)) !== null) {
|
||||
const matchStart = match.index + match[1].length;
|
||||
const matchEnd = matchStart + match[2].length;
|
||||
segments
|
||||
.filter(segment => segment.end > matchStart && segment.start < matchEnd)
|
||||
.forEach(segment => this.decorateGlossaryWord(segment.element, entry));
|
||||
}
|
||||
this.buildGlossaryTermPatterns(entry.term).forEach((pattern) => {
|
||||
const matcher = new RegExp(`(^|\\s)(${pattern})(?=\\s|$|[.,;:!?])`, 'giu');
|
||||
let match;
|
||||
while ((match = matcher.exec(fullText)) !== null) {
|
||||
const matchStart = match.index + match[1].length;
|
||||
const matchEnd = matchStart + match[2].length;
|
||||
segments
|
||||
.filter(segment => segment.end > matchStart && segment.start < matchEnd)
|
||||
.forEach(segment => this.decorateGlossarySegment(segment, entry, matchStart, matchEnd, 'text'));
|
||||
}
|
||||
});
|
||||
this.buildCompactGlossaryTermPatterns(entry.term).forEach((pattern) => {
|
||||
const matcher = new RegExp(pattern, 'giu');
|
||||
let match;
|
||||
while ((match = matcher.exec(compactFullText)) !== null) {
|
||||
const matchStart = match.index;
|
||||
const matchEnd = matchStart + match[0].length;
|
||||
compactSegments
|
||||
.filter(segment => segment.end > matchStart && segment.start < matchEnd)
|
||||
.forEach(segment => this.decorateGlossarySegment(segment, entry, matchStart, matchEnd, 'compact'));
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
normalizeGlossaryText(text) {
|
||||
return String(text || '')
|
||||
.normalize('NFC')
|
||||
.replace(/\u200c/g, '')
|
||||
.replace(/\u00ad/g, '')
|
||||
.replace(/-\s*$/g, '')
|
||||
@@ -372,6 +400,157 @@ class LayoutRendererModule extends BaseModule {
|
||||
.trim();
|
||||
}
|
||||
|
||||
normalizeGlossaryToken(text) {
|
||||
return this.normalizeGlossaryText(text)
|
||||
.replace(/^[.,;:!?()[\]{}"'„“”‚‘’»«]+|[.,;:!?()[\]{}"'„“”‚‘’»«]+$/g, '');
|
||||
}
|
||||
|
||||
normalizeGlossaryCompact(text) {
|
||||
return this.normalizeGlossaryToken(text)
|
||||
.replace(/[-\s]+/g, '')
|
||||
.replace(/[.,;:!?()[\]{}"'„“”‚‘’»«]+/g, '');
|
||||
}
|
||||
|
||||
buildGlossaryTermPatterns(term) {
|
||||
const normalizedTerm = this.normalizeGlossaryText(term);
|
||||
if (!normalizedTerm) return [];
|
||||
|
||||
const exact = normalizedTerm
|
||||
.split(/\s+/)
|
||||
.map(token => this.escapeRegExp(this.normalizeGlossaryToken(token)))
|
||||
.filter(Boolean)
|
||||
.join('\\s+');
|
||||
if (!exact) return [];
|
||||
|
||||
const inflected = normalizedTerm
|
||||
.split(/\s+/)
|
||||
.map((token, index, tokens) => {
|
||||
const normalized = this.normalizeGlossaryToken(token);
|
||||
if (!normalized) return '';
|
||||
const escaped = this.escapeRegExp(normalized);
|
||||
const isLast = index === tokens.length - 1;
|
||||
return isLast ? `${escaped}(?:s|es|e|en|er|n)?` : `${escaped}(?:e|en|er|es|n)?`;
|
||||
})
|
||||
.filter(Boolean)
|
||||
.join('\\s+');
|
||||
|
||||
return [...new Set([exact, inflected])];
|
||||
}
|
||||
|
||||
buildCompactGlossaryTermPatterns(term) {
|
||||
const tokens = this.normalizeGlossaryText(term)
|
||||
.split(/\s+/)
|
||||
.map(token => this.normalizeGlossaryCompact(token))
|
||||
.filter(Boolean);
|
||||
if (tokens.length === 0) return [];
|
||||
|
||||
const exact = tokens.map(token => this.escapeRegExp(token)).join('');
|
||||
const inflected = tokens
|
||||
.map((token, index) => {
|
||||
const escaped = this.escapeRegExp(token);
|
||||
const isLast = index === tokens.length - 1;
|
||||
return isLast ? `${escaped}(?:s|es|e|en|er|n)?` : `${escaped}(?:e|en|er|es|n)?`;
|
||||
})
|
||||
.join('');
|
||||
|
||||
return [...new Set([exact, inflected])];
|
||||
}
|
||||
|
||||
decorateGlossarySegment(segment, entry, matchStart, matchEnd, mode = 'text') {
|
||||
if (!segment?.element || !entry?.definition) return;
|
||||
|
||||
const localStart = Math.max(0, matchStart - segment.start);
|
||||
const localEnd = Math.min(segment.end - segment.start, matchEnd - segment.start);
|
||||
if (localEnd <= localStart) return;
|
||||
|
||||
const segmentLength = mode === 'compact'
|
||||
? this.normalizeGlossaryCompact(segment.text).length
|
||||
: segment.text.length;
|
||||
|
||||
if (localStart <= 0 && localEnd >= segmentLength) {
|
||||
this.decorateGlossaryWord(segment.element, entry);
|
||||
return;
|
||||
}
|
||||
|
||||
if (mode === 'compact') {
|
||||
return;
|
||||
}
|
||||
|
||||
this.decorateGlossaryRange(segment.element, entry, localStart, localEnd);
|
||||
}
|
||||
|
||||
decorateGlossaryRange(word, entry, start, end) {
|
||||
if (!word || !entry?.definition) return;
|
||||
|
||||
const text = word.textContent || '';
|
||||
const safeStart = Math.max(0, Math.min(text.length, start));
|
||||
const safeEnd = Math.max(safeStart, Math.min(text.length, end));
|
||||
if (safeStart === 0 && safeEnd >= text.length) {
|
||||
this.decorateGlossaryWord(word, entry);
|
||||
return;
|
||||
}
|
||||
if (safeEnd <= safeStart) return;
|
||||
|
||||
word.dataset.glossaryPartial = 'true';
|
||||
|
||||
const textNodes = [];
|
||||
const filter = window.NodeFilter || NodeFilter;
|
||||
const walker = document.createTreeWalker(word, filter.SHOW_TEXT);
|
||||
let node;
|
||||
while ((node = walker.nextNode())) {
|
||||
textNodes.push(node);
|
||||
}
|
||||
|
||||
let offset = 0;
|
||||
textNodes.forEach((textNode) => {
|
||||
const nodeText = textNode.nodeValue || '';
|
||||
const nodeStart = offset;
|
||||
const nodeEnd = nodeStart + nodeText.length;
|
||||
offset = nodeEnd;
|
||||
|
||||
const overlapStart = Math.max(safeStart, nodeStart);
|
||||
const overlapEnd = Math.min(safeEnd, nodeEnd);
|
||||
if (overlapEnd <= overlapStart || !textNode.parentNode) return;
|
||||
|
||||
const localStart = overlapStart - nodeStart;
|
||||
const localEnd = overlapEnd - nodeStart;
|
||||
const before = nodeText.slice(0, localStart);
|
||||
const matched = nodeText.slice(localStart, localEnd);
|
||||
const after = nodeText.slice(localEnd);
|
||||
const parent = textNode.parentNode;
|
||||
|
||||
if (before) {
|
||||
parent.insertBefore(document.createTextNode(before), textNode);
|
||||
}
|
||||
|
||||
if (matched) {
|
||||
const gloss = document.createElement('span');
|
||||
gloss.textContent = matched;
|
||||
this.decorateGlossaryWord(gloss, entry);
|
||||
parent.insertBefore(gloss, textNode);
|
||||
}
|
||||
|
||||
if (after) {
|
||||
parent.insertBefore(document.createTextNode(after), textNode);
|
||||
}
|
||||
|
||||
parent.removeChild(textNode);
|
||||
});
|
||||
|
||||
if (textNodes.length === 0) {
|
||||
const before = text.slice(0, safeStart);
|
||||
const matched = text.slice(safeStart, safeEnd);
|
||||
const after = text.slice(safeEnd);
|
||||
word.textContent = '';
|
||||
if (before) word.appendChild(document.createTextNode(before));
|
||||
const gloss = document.createElement('span');
|
||||
gloss.textContent = matched;
|
||||
this.decorateGlossaryWord(gloss, entry);
|
||||
word.appendChild(gloss);
|
||||
if (after) word.appendChild(document.createTextNode(after));
|
||||
}
|
||||
}
|
||||
|
||||
decorateGlossaryWord(word, entry) {
|
||||
if (!word || !entry?.definition) return;
|
||||
word.classList.add('story-glossary-word');
|
||||
|
||||
Reference in New Issue
Block a user