/** * @license Hyphenopoly.module.js 6.0.0 - hyphenation for node * ©2024 Mathias Nater, Güttingen (mathiasnater at gmail dot com) * https://github.com/mnater/Hyphenopoly * * Released under the MIT license * http://mnater.github.io/Hyphenopoly/LICENSE */ /* eslint-env node */ const decode = (() => { const utf16ledecoder = new TextDecoder("utf-16le"); return (ui16) => { return utf16ledecoder.decode(ui16); }; })(); /** * Create Object without standard Object-prototype * @returns {Object} empty object */ const empty = () => { return Object.create(null); }; const H = empty(); H.supportedLanguages = [ "af", "as", "be", "bg", "bn", "ca", "cs", "cy", "da", "de", "de-x-syllable", "el-monoton", "el-polyton", "en-gb", "en-us", "eo", "es", "et", "eu", "fi", "fo", "fr", "fur", "ga", "gl", "gu", "hi", "hr", "hsb", "hu", "hy", "ia", "id", "is", "it", "ka", "kmr", "kn", "la", "lt", "lv", "mk", "ml", "mn-cyrl", "mr", "nb", "nl", "nn", "no", "oc", "or", "pa", "pi", "pl", "pms", "pt", "rm", "ro", "ru", "sh-cyrl", "sh-latn", "sk", "sl", "sq", "sr-cyrl", "sv", "ta", "te", "th", "tk", "tr", "uk", "zh-latn-pinyin" ]; H.languages = new Map(); /** * Create lang Object * @param {string} lang The language * @returns {Object} The newly created lang object */ function createLangObj(lang) { if (!H.languages.has(lang)) { H.languages.set(lang, empty()); } return H.languages.get(lang); } /** * Setup a language object (lo) and dispatch "engineReady" * @param {string} lang The language * @param {function} hyphenateFunction The hyphenateFunction * @param {string} alphabet List of used characters * @param {number} patternLeftmin leftmin as defined in patterns * @param {number} patternRightmin rightmin as defined in patterns * @returns {undefined} */ function prepareLanguagesObj( lang, hyphenateFunction, alphabet, patternLeftmin, patternRightmin ) { alphabet = alphabet.replace(/\\*-/g, "\\-"); const lo = createLangObj(lang); if (!lo.engineReady) { lo.cache = new Map(); const exc = []; if (H.c.exceptions.has(lang)) { exc.push(...H.c.exceptions.get(lang).split(", ")); } if (H.c.exceptions.has("global")) { exc.push(...H.c.exceptions.get("global").split(", ")); } lo.exceptions = new Map(exc.map((e) => { return [e.replace(/-/g, ""), e]; })); lo.alphabet = alphabet; lo.reNotAlphabet = RegExp(`[^${alphabet}]`, "i"); lo.lm = Math.max( patternLeftmin, H.c.leftmin, H.c.leftminPerLang.get(lang) || 0 ); lo.rm = Math.max( patternRightmin, H.c.rightmin, H.c.rightminPerLang.get(lang) || 0 ); lo.hyphenate = hyphenateFunction; lo.engineReady = true; } H.events.dispatch("engineReady", {"msg": lang}); } /** * Setup env for hyphenateFunction * @param {Object} baseData baseData * @param {function} hyphenateFunc hyphenateFunction * @returns {function} hyphenateFunction with closured environment */ function encloseHyphenateFunction(buf, hyphenateFunc) { const wordStore = new Uint16Array(buf, 0, 64); /** * The hyphenateFunction that encloses the env above * Copies the word to wasm-Memory, calls wasm.hyphenateFunc and reads * the hyphenated word from wasm-Memory (eventually replacing hyphenchar) * @param {String} word - the word that has to be hyphenated * @param {String} hyphenchar - the hyphenate character * @param {Number} leftmin - min number of chars to remain on line * @param {Number} rightmin - min number of chars to go to new line * @returns {String} the hyphenated word */ return ((word, hyphencc, leftmin, rightmin) => { wordStore.set([ ...[...word].map((c) => { return c.charCodeAt(0); }), 0 ]); const len = hyphenateFunc(leftmin, rightmin, hyphencc); if (len > 0) { word = decode(new Uint16Array(buf, 0, len)); } return word; }); } /** * Instantiate Wasm Engine * @param {string} lang The language * @returns {undefined} */ function instantiateWasmEngine(lang, wasmdata) { /** * Register character substitutions in the .wasm-hyphenEngine * @param {number} alphalen - The length of the alphabet * @param {object} exp - Export-object of the hyphenEngine */ function registerSubstitutions(alphalen, exp) { if (H.c.substitute.has(lang)) { const subst = H.c.substitute.get(lang); subst.forEach((substituer, substituted) => { const substitutedU = substituted.toUpperCase(); const substitutedUcc = (substitutedU === substituted) ? 0 : substitutedU.charCodeAt(0); alphalen = exp.subst( substituted.charCodeAt(0), substitutedUcc, substituer.charCodeAt(0) ); }); } return alphalen; } /** * Instantiate the hyphenEngine * @param {object} res - The fetched ressource */ function handleWasm(inst) { const exp = inst.exports; let alphalen = exp.lct.value; alphalen = registerSubstitutions(alphalen, exp); prepareLanguagesObj( lang, encloseHyphenateFunction( exp.mem.buffer, exp.hyphenate ), decode(new Uint16Array(exp.mem.buffer, 1664, alphalen)), exp.lmi.value, exp.rmi.value ); } if (H.c.sync) { const heInstance = new WebAssembly.Instance( new WebAssembly.Module(wasmdata) ); handleWasm(heInstance); } else { WebAssembly.instantiate(wasmdata).then((res) => { handleWasm(res.instance); }); } } /** * Read a .wasm file and call instantiateWasmEngine on success * @param {string} lang - The language * @returns {undefined} */ function loadHyphenEngine(lang) { const file = `${lang}.wasm`; // eslint-disable-next-line require-jsdoc const cb = (err, data) => { if (err) { H.events.dispatch("error", { "key": lang, "msg": `${lang}.wasm not found.` }); } else { instantiateWasmEngine(lang, new Uint8Array(data).buffer); } }; if (typeof H.c.loader !== "function") { H.events.dispatch("error", { "msg": "Loader must be a function." }); return; } if (H.c.sync) { cb(null, H.c.loaderSync(file, new URL('./patterns/', import.meta.url))); } else { H.c.loader(file, new URL('./patterns/', import.meta.url)).then( (res) => { cb(null, res); }, (err) => { cb(err, null); } ); } } const wordHyphenatorPool = new Map(); /** * Factory for hyphenatorFunctions for a specific language and class * @param {Object} lo Language-Object * @param {string} lang The language * @returns {function} The hyphenate function */ function createWordHyphenator(lo, lang) { if (wordHyphenatorPool.has(lang)) { return wordHyphenatorPool.get(lang); } /** * HyphenateFunction for non-compound words * @param {string} word The word * @returns {string} The hyphenated word */ function hyphenateNormal(word) { if (word.length > 61) { H.events.dispatch("error", {"msg": "found word longer than 61 characters"}); } else if (!lo.reNotAlphabet.test(word)) { return lo.hyphenate( word, H.c.hyphen.charCodeAt(0), lo.lm, lo.rm ); } return word; } /** * HyphenateFunction for compound words * @param {string} word The word * @returns {string} The hyphenated compound word */ function hyphenateCompound(word) { let joiner = "-"; const parts = word.split(joiner).map((p) => { if (H.c.compound !== "hyphen" && p.length >= H.c.minWordLength) { return createWordHyphenator(lo, lang)(p); } return p; }); if (H.c.compound !== "auto") { // Add Zero Width Space joiner += "\u200B"; } return parts.join(joiner); } /** * Checks if a string is mixed case * @param {string} s The string * @returns {boolean} true if s is mixed case */ function isMixedCase(s) { return Array.prototype.map.call(s, (c) => { return (c === c.toLowerCase()); }).some((v, i, a) => { return (v !== a[0]); }); } /** * HyphenateFunction for words (compound or not) * @param {string} word The word * @returns {string} The hyphenated word */ function hyphenator(word) { let hw = lo.cache.get(word); if (!hw) { if (lo.exceptions.has(word)) { hw = lo.exceptions.get(word).replace( /-/g, H.c.hyphen ); } else if (!H.c.mixedCase && isMixedCase(word)) { hw = word; } else if (word.includes("-")) { hw = hyphenateCompound(word); } else { hw = hyphenateNormal(word); } lo.cache.set(word, hw); } return hw; } wordHyphenatorPool.set(lang, hyphenator); return hyphenator; } const orphanController = (() => { /** * Function template * @param {string} ignore unused result of replace * @param {string} leadingWhiteSpace The leading whiteSpace * @param {string} lastWord The last word * @param {string} trailingWhiteSpace The trailing whiteSpace * @returns {string} Treated end of text */ function controlOrphans( ignore, leadingWhiteSpace, lastWord, trailingWhiteSpace ) { let h = H.c.hyphen; if (".\\+*?[^]$(){}=!<>|:-".indexOf(H.c.hyphen) !== -1) { h = `\\${H.c.hyphen}`; } if (H.c.orphanControl === 3 && leadingWhiteSpace === " ") { // \u00A0 = no-break space (nbsp) leadingWhiteSpace = "\u00A0"; } /* eslint-disable security/detect-non-literal-regexp */ return leadingWhiteSpace + lastWord.replace(new RegExp(h, "g"), "") + trailingWhiteSpace; /* eslint-enable security/detect-non-literal-regexp */ } return controlOrphans; })(); /** * Encloses hyphenateTextFunction * @param {string} lang - The language * @return {function} The hyphenateText-function */ function createTextHyphenator(lang) { const lo = H.languages.get(lang); const wordHyphenator = (wordHyphenatorPool.has(lang)) ? wordHyphenatorPool.get(lang) : createWordHyphenator(lo, lang); /* * Transpiled RegExp of * /[${alphabet}\p{Letter}-]{${minwordlength},}/gui */ const reWord = RegExp( `[${lo.alphabet}a-z\u0300-\u036F\u0483-\u0487\u00DF-\u00F6\u00F8-\u00FE\u0101\u0103\u0105\u0107\u0109\u010D\u010F\u0111\u0113\u0117\u0119\u011B\u011D\u011F\u0123\u0125\u012B\u012F\u0131\u0135\u0137\u013C\u013E\u0142\u0144\u0146\u0148\u014D\u0151\u0153\u0155\u0159\u015B\u015D\u015F\u0161\u0165\u016B\u016D\u016F\u0171\u0173\u017A\u017C\u017E\u017F\u01CE\u01D0\u01D2\u01D4\u01D6\u01D8\u01DA\u01DC\u0219\u021B\u02BC\u0390\u03AC-\u03CE\u03D0\u03E3\u03E5\u03E7\u03E9\u03EB\u03ED\u03EF\u03F2\u0430-\u044F\u0451-\u045C\u045E\u045F\u0491\u04AF\u04E9\u0561-\u0585\u0587\u0905-\u090C\u090F\u0910\u0913-\u0928\u092A-\u0930\u0932\u0933\u0935-\u0939\u093D\u0960\u0961\u0985-\u098C\u098F\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BD\u09CE\u09DC\u09DD\u09DF-\u09E1\u0A05-\u0A0A\u0A0F\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32\u0A33\u0A35\u0A36\u0A38\u0A39\u0A85-\u0A8B\u0A8F\u0A90\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2\u0AB3\u0AB5-\u0AB9\u0ABD\u0AE0\u0B05-\u0B0C\u0B0F\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32\u0B33\u0B35-\u0B39\u0B60\u0B61\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99\u0B9A\u0B9C\u0B9E\u0B9F\u0BA3\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C60\u0C61\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBD\u0CDE\u0CE0\u0CE1\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D60\u0D61\u0D7A-\u0D7F\u0E01-\u0E2E\u0E30\u0E32\u0E33\u0E40-\u0E45\u10D0-\u10F0\u1200-\u1248\u124A-\u124D\u1250-\u1256\u1258\u125A-\u125D\u1260-\u1288\u128A-\u128D\u1290-\u12B0\u12B2-\u12B5\u12B8-\u12BE\u12C0\u12C2-\u12C5\u12C8-\u12D6\u12D8-\u1310\u1312-\u1315\u1318-\u135A\u1380-\u138F\u1E0D\u1E37\u1E41\u1E43\u1E45\u1E47\u1E6D\u1F00-\u1F07\u1F10-\u1F15\u1F20-\u1F27\u1F30-\u1F37\u1F40-\u1F45\u1F50-\u1F57\u1F60-\u1F67\u1F70-\u1F7D\u1F80-\u1F87\u1F90-\u1F97\u1FA0-\u1FA7\u1FB2-\u1FB4\u1FB6\u1FB7\u1FC2-\u1FC4\u1FC6\u1FC7\u1FD2\u1FD3\u1FD6\u1FD7\u1FE2-\u1FE7\u1FF2-\u1FF4\u1FF6\u1FF7\u2C81\u2C83\u2C85\u2C87\u2C89\u2C8D\u2C8F\u2C91\u2C93\u2C95\u2C97\u2C99\u2C9B\u2C9D\u2C9F\u2CA1\u2CA3\u2CA5\u2CA7\u2CA9\u2CAB\u2CAD\u2CAF\u2CB1\u2CC9\u2D80-\u2D96\u2DA0-\u2DA6\u2DA8-\u2DAE\u2DB0-\u2DB6\u2DB8-\u2DBE\u2DC0-\u2DC6\u2DC8-\u2DCE\u2DD0-\u2DD6\u2DD8-\u2DDE\uAB01-\uAB06\uAB09-\uAB0E\uAB11-\uAB16\uAB20-\uAB26\uAB28-\uAB2E-]{${H.c.minWordLength},}`, "gui" ); /** * Hyphenate text * @param {string} text The text * @param {string} lang The language of the text * @returns {string} Hyphenated text */ return ((text) => { if (H.c.normalize) { text = text.normalize("NFC"); } let tn = text.replace(reWord, wordHyphenator); if (H.c.orphanControl !== 1) { tn = tn.replace( /(\u0020*)(\S+)(\s*)$/, orphanController ); } return tn; }); } (() => { // Events known to the system const definedEvents = new Map(); /** * Create Event Object * @param {string} name The Name of the event * @param {function|null} defFunc The default method of the event * @param {boolean} cancellable Is the default cancellable * @returns {undefined} */ function define(name, defFunc, cancellable) { definedEvents.set(name, { cancellable, "default": defFunc, "register": [] }); } define( "error", (e) => { // eslint-disable-next-line no-console console.error(e.msg); }, true ); define( "engineReady", null, false ); /** * Dispatch event with arguments * @param {string} name The name of the event * @param {Object|undefined} data Data of the event * @returns {undefined} */ function dispatch(name, data) { data.defaultPrevented = false; data.preventDefault = (() => { data.defaultPrevented = true; }); definedEvents.get(name).register.forEach((currentHandler) => { currentHandler(data); }); if (!data.defaultPrevented && definedEvents.get(name).default) { definedEvents.get(name).default(data); } } /** * Add EventListender to event * @param {string} name The name of the event * @param {function} handler Function to register * @returns {undefined} */ function addListener(name, handler) { if (definedEvents.has(name)) { definedEvents.get(name).register.push(handler); } else { H.events.dispatch( "error", {"msg": `unknown Event "${name}" discarded`} ); } } H.events = empty(); H.events.dispatch = dispatch; H.events.addListener = addListener; })(); /** * Create a Map with a default Map behind the scenes. This mimics * kind of a prototype chain of an object, but without the object- * injection security risk. * * @param {Map} defaultsMap - A Map with default values * @returns {Proxy} - A Proxy for the Map (dot-notation or get/set) */ function createMapWithDefaults(defaultsMap) { const userMap = new Map(); /** * The get-trap: get the value from userMap or else from defaults * @param {Sring} key - The key to retrieve the value for * @returns {*} */ function get(key) { return (userMap.has(key)) ? userMap.get(key) : defaultsMap.get(key); } /** * The set-trap: set the value to userMap and don't touch defaults * @param {Sring} key - The key for the value * @param {*} value - The value * @returns {*} */ function set(key, value) { userMap.set(key, value); } return new Proxy(defaultsMap, { "get": (_target, prop) => { if (prop === "set") { return set; } if (prop === "get") { return get; } return get(prop); } }); } /** * Default loader emits error * @returns null */ function defaultLoader() { H.events.dispatch("error", { "msg": "loader/loaderSync has not been configured." }); return null; } H.config = ((userConfig) => { const settings = createMapWithDefaults(new Map([ ["compound", "hyphen"], ["exceptions", new Map()], ["hyphen", "\u00AD"], ["leftmin", 0], ["leftminPerLang", new Map()], ["loader", defaultLoader], ["loaderSync", defaultLoader], ["minWordLength", 6], ["mixedCase", true], ["normalize", false], ["orphanControl", 1], ["require", []], ["rightmin", 0], ["rightminPerLang", new Map()], ["substitute", new Map()], ["sync", false] ])); Object.entries(userConfig).forEach(([key, value]) => { switch (key) { case "exceptions": case "leftminPerLang": case "paths": case "rightminPerLang": Object.entries(value).forEach(([k, v]) => { settings.get(key).set(k, v); }); break; case "substitute": Object.entries(value).forEach(([lang, subst]) => { settings.substitute.set( lang, new Map(Object.entries(subst)) ); }); break; default: settings.set(key, value); } }); H.c = settings; if (H.c.handleEvent) { Object.entries(H.c.handleEvent).forEach(([name, fn]) => { H.events.addListener(name, fn); }); } const result = new Map(); if (H.c.require.length === 0) { H.events.dispatch( "error", {"msg": "No language has been required. Setup config according to documenation."} ); } H.c.require.forEach((lang) => { if (H.c.sync) { H.events.addListener("engineReady", (e) => { if (e.msg === lang) { result.set(lang, createTextHyphenator(lang)); } }); } else { const prom = new Promise((resolve, reject) => { H.events.addListener("engineReady", (e) => { if (e.msg === lang) { resolve(createTextHyphenator(lang)); } }); H.events.addListener("error", (e) => { if (e.key === lang) { reject(e.msg); } }); }); result.set(lang, prom); } loadHyphenEngine(lang); }); return result; }); export default H;