diff --git a/wasm/README.md b/wasm/README.md index 728b0a364..a0b3d7820 100644 --- a/wasm/README.md +++ b/wasm/README.md @@ -1,95 +1,25 @@ # Using Bergamot Translator in JavaScript -Instructions in this document assume current-directory to be -[wasm](https://github.com/browsermt/bergamot-translator/tree/main/wasm) within -bergamot-translator source. - -The example file `bergamot.html` in the folder `test_page` demonstrates how to -use the bergamot translator in JavaScript via a ` - - - diff --git a/wasm/test_page/bergamot.js b/wasm/test_page/bergamot.js deleted file mode 100644 index 848fba177..000000000 --- a/wasm/test_page/bergamot.js +++ /dev/null @@ -1,54 +0,0 @@ -var worker; - -if (window.Worker) { - var worker = new Worker('worker.js'); - worker.postMessage(["load_module"]); -} - -const log = (message) => { - document.querySelector("#log").value += message + "\n"; -} - -document.querySelector("#translate").addEventListener("click", () => { - translateCall(); -}); - -document.querySelector("#from").addEventListener('keyup', function(event) { - if (event.keyCode === 13) { - translateCall(); - } -}); - -document.querySelector("#load").addEventListener("click", async() => { - document.querySelector("#load").disabled = true; - const lang = document.querySelector('input[name="modellang"]:checked').value; - const from = lang.substring(0, 2); - const to = lang.substring(2, 4); - let start = Date.now(); - worker.postMessage(["load_model", from, to]); - document.querySelector("#load").disabled = false; -}); - -const translateCall = () => { - const text = document.querySelector('#from').value; - const paragraphs = text.split("\n"); - document.querySelector("#load").disabled = true; - const lang = document.querySelector('input[name="modellang"]:checked').value; - const from = lang.substring(0, 2); - const to = lang.substring(2, 4); - worker.postMessage(["translate", from, to, paragraphs]); - document.querySelector("#load").disabled = false; -} - -worker.onmessage = function(e) { - console.debug(`Message received from worker`); - if (e.data[0] === 'translated_result') { - if (e.data[1]) { - document.querySelector('#to').value = e.data[1].join("\n"); - } - log(e.data[2]); - } - if ((e.data[0] === 'module_loaded') || (e.data[0] === 'model_loaded')) { - log(e.data[1]); - } -} \ No newline at end of file diff --git a/wasm/test_page/css/index.css b/wasm/test_page/css/index.css new file mode 100644 index 000000000..bbc5bf147 --- /dev/null +++ b/wasm/test_page/css/index.css @@ -0,0 +1,99 @@ +* { + box-sizing: border-box; +} + +html, +body { + height: 100%; + margin: 0; + font-size: 18px; + font-family: Optima, Helvetica, Arial; +} + +body { + padding: 1rem; +} + +.app { + padding: 1rem; + display: grid; + grid: "from swap to" 1fr "status status status" auto / 1fr auto 1fr; + grid-gap: 1rem; + overflow: hidden; + min-height: 400px; + max-width: 1024px; + margin: 1em auto; +} + +@media screen and (max-width: 640px) { + .app { + grid: "from from" auto "status swap" auto "to to" auto / 1fr; + } +} + +.panel { + display: grid; + grid-template-rows: auto 1fr; + grid-gap: 1rem; +} + +label { + padding: 0 0.5em; + display: flex; + align-items: center; +} + +.lang-select { + padding: 0.25rem 0.5rem; + margin-left: 1rem; + background: #f4f4f4; + font-size: 0.9rem; + border: 1px solid #ccc; + border-radius: 0.25rem; + cursor: pointer; +} + +.panel--from { + grid-area: from; +} + +.panel--to { + grid-area: to; +} + +.swap { + align-self: center; + grid-area: swap; + font-size: 1.1rem; +} + +#status { + grid-area: status; + text-align: center; + align-self: center; +} + +textarea { + padding: 1rem; + font-family: sans-serif; + font-size: 1rem; + resize: none; + border-radius: 2px; + border: 1px solid #ccc; +} + +button { + cursor: pointer; + border: 1px solid #88c; + border-radius: 4px; + background: #eef; + padding: 0; + padding: 0.25rem 0.5rem; +} +button:hover { + background: #cce; +} + +#output { + background-color: #f4f4f4; +} diff --git a/wasm/test_page/helper.js b/wasm/test_page/helper.js deleted file mode 100644 index bff116ced..000000000 --- a/wasm/test_page/helper.js +++ /dev/null @@ -1,40 +0,0 @@ -/* - * @author - Based of a file from Gist here: https://gist.github.com/1757658 - * - * @modified - Mike Newell - it was on Gist so I figure I can use it - * - * @Description - Added support for a few more mime types including the new - * .ogv, .webm, and .mp4 file types for HTML5 video. - * - */ - -/* -* @modified - Andre Natal - removed unused types for the purpose of this use -case -*/ - -Helper = { - - types: { - "wasm" : "application/wasm" - , "js" : "application/javascript" - , "html" : "text/html" - , "htm" : "text/html" - , "ico" : "image/vnd.microsoft.icon", - }, - - getMime: function(u) { - - var ext = this.getExt(u.pathname).replace('.', ''); - - return this.types[ext.toLowerCase()] || 'application/octet-stream'; - - }, - - getExt: function(path) { - var i = path.lastIndexOf('.'); - - return (i < 0) ? '' : path.substr(i); - } - -}; diff --git a/wasm/test_page/index.html b/wasm/test_page/index.html new file mode 100644 index 000000000..86eae4637 --- /dev/null +++ b/wasm/test_page/index.html @@ -0,0 +1,33 @@ + + + + Mozilla Translations + + + + + +
+
+ + +
+ +
+ + +
+ +
+ + + diff --git a/wasm/test_page/js/index.js b/wasm/test_page/js/index.js new file mode 100644 index 000000000..6b580415f --- /dev/null +++ b/wasm/test_page/js/index.js @@ -0,0 +1,101 @@ +let worker; +let modelRegistry; + +const $ = selector => document.querySelector(selector); +const status = message => ($("#status").innerText = message); + +const langFrom = $("#lang-from"); +const langTo = $("#lang-to"); + +const langs = [ + ["en", "English"], + ["it", "Italian"], + ["pt", "Portuguese"], + ["ru", "Russian"], + ["cs", "Czech"], + ["de", "German"], + ["es", "Spanish"], + ["et", "Estonian"], +]; + +if (window.Worker) { + worker = new Worker("js/worker.js"); + worker.postMessage(["import"]); +} + +document.querySelector("#input").addEventListener("keyup", function (event) { + translateCall(); +}); + +const translateCall = () => { + const text = document.querySelector("#input").value + " "; + if (!text.trim().length) return; + const paragraphs = text.split("\n"); + $("#output").setAttribute("disabled", true); + const lngFrom = langFrom.value; + const lngTo = langTo.value; + worker.postMessage(["translate", lngFrom, lngTo, paragraphs]); +}; + +worker.onmessage = function (e) { + if (e.data[0] === "translate_reply" && e.data[1]) { + document.querySelector("#output").value = e.data[1].join("\n\n"); + $("#output").removeAttribute("disabled"); + } else if (e.data[0] === "load_model_reply" && e.data[1]) { + status(e.data[1]); + translateCall(); + } else if (e.data[0] === "import_reply" && e.data[1]) { + modelRegistry = e.data[1]; + init(); + } +}; + +langs.forEach(([code, name]) => { + langFrom.innerHTML += ``; + langTo.innerHTML += ``; +}); + +const loadModel = () => { + const lngFrom = langFrom.value; + const lngTo = langTo.value; + if (lngFrom !== lngTo) { + status(`Installing model...`); + console.log(`Loading model '${lngFrom}${lngTo}'`); + worker.postMessage(["load_model", lngFrom, lngTo]); + } else { + const input = document.querySelector("#input").value; + document.querySelector("#output").value = input; + } +}; + +langFrom.addEventListener("change", e => { + loadModel(); +}); + +langTo.addEventListener("change", e => { + loadModel(); +}); + +$(".swap").addEventListener("click", e => { + [langFrom.value, langTo.value] = [langTo.value, langFrom.value]; + $("#input").value = $("#output").value; + loadModel(); +}); + +function init() { + // try to guess input language from user agent + let myLang = navigator.language; + if (myLang) { + myLang = myLang.split("-")[0]; + let langIndex = langs.findIndex(([code]) => code === myLang); + if (langIndex > -1) { + console.log("guessing input language is", myLang); + langFrom.value = myLang; + } + } + + // find first output lang that *isn't* input language + langTo.value = langs.find(([code]) => code !== langFrom.value)[0]; + // load this model + loadModel(); +} diff --git a/wasm/test_page/js/modelRegistry.js b/wasm/test_page/js/modelRegistry.js new file mode 100644 index 000000000..c8d6eda5e --- /dev/null +++ b/wasm/test_page/js/modelRegistry.js @@ -0,0 +1,328 @@ + +//const rootURL = "https://storage.googleapis.com/bergamot-models-sandbox/0.2.10"; +const rootURL = "../models"; + +const modelRegistry = { + enit: { + vocab: { + name: "vocab.enit.spm", + size: 814128, + estimatedCompressedSize: 405338, + expectedSha256Hash: + "de8cbeb79e0139304bfa47e8559f2447016bf9906225a97d3df1baed4de8f3a3", + }, + lex: { + name: "lex.50.50.enit.s2t.bin", + size: 4489920, + estimatedCompressedSize: 2409986, + expectedSha256Hash: + "bb1fad3b3f6a13ebce1698cf7f39ca736c4dea4525f3dab5e1a78436f07445e6", + }, + model: { + name: "model.enit.intgemm.alphas.bin", + size: 17140836, + estimatedCompressedSize: 13283223, + expectedSha256Hash: + "a5ce3723f62ead92a0e0373b6df0ad8e3e6d22963adb1333984206e33b8b6c61", + }, + }, + enpt: { + vocab: { + name: "vocab.enpt.spm", + size: 812781, + estimatedCompressedSize: 406524, + expectedSha256Hash: + "633a3d782c79f7d5e4b94ab96848f47c2fdf8ba82dd99efd1742b8a696bbd0cc", + }, + lex: { + name: "lex.50.50.enpt.s2t.bin", + size: 4472528, + estimatedCompressedSize: 2411984, + expectedSha256Hash: + "1e96599123d275afa37353dfe84677a4070f013494fbdc9c52a28445cc9bc38d", + }, + model: { + name: "model.enpt.intgemm.alphas.bin", + size: 17140836, + estimatedCompressedSize: 13429592, + expectedSha256Hash: + "d968735704c75e33c2e183b9241f14c0b2a560d01d88a2728e5c0119a4d7fb22", + }, + }, + enru: { + vocab: { + name: "vocab.enru.spm", + size: 937157, + estimatedCompressedSize: 435776, + expectedSha256Hash: + "feca2d44f01b946c85faba3b15b5eb53344bec84cd14a1a4d4a82ddd774c5edd", + }, + lex: { + name: "lex.50.50.enru.s2t.bin", + size: 3049096, + estimatedCompressedSize: 1579779, + expectedSha256Hash: + "7bd3e2c0a72286fe1f3da65c56c49a7cd77efa5f1d1a444e2a9e769480b96ff3", + }, + model: { + name: "model.enru.intgemm.alphas.bin", + size: 17140836, + estimatedCompressedSize: 12853987, + expectedSha256Hash: + "4a45186a93b8a2dd9301c66a3b3dad580b1bcfa74aadda583ca383f9fe0dea93", + }, + }, + iten: { + vocab: { + name: "vocab.iten.spm", + size: 814151, + estimatedCompressedSize: 405416, + expectedSha256Hash: + "22d5ce6973be5360a921103acbe984a9bfca952a1f6c55c9cb5ef7de4fd58266", + }, + lex: { + name: "lex.50.50.iten.s2t.bin", + size: 5238420, + estimatedCompressedSize: 2860178, + expectedSha256Hash: + "357d362373022b029ee9965975a133e6f36fdb0fed749202ff578365cf0111f8", + }, + model: { + name: "model.iten.intgemm.alphas.bin", + size: 17140836, + estimatedCompressedSize: 13423308, + expectedSha256Hash: + "1fae546faeb9046f80b1b7e940b37b660974ce72902778181d6cd1c30b717f35", + }, + }, + pten: { + vocab: { + name: "vocab.pten.spm", + size: 812889, + estimatedCompressedSize: 406730, + expectedSha256Hash: + "8389979e3c965688b07aeb712a7e44406e5dcdb2b84087229d26fcc71448c4ed", + }, + lex: { + name: "lex.50.50.pten.s2t.bin", + size: 5001420, + estimatedCompressedSize: 2733800, + expectedSha256Hash: + "212ed0ae44a6f920cd6d17ca02f0a523ba6c4b0ef5078ae310c20bc4c51484c5", + }, + model: { + name: "model.pten.intgemm.alphas.bin", + size: 17140836, + estimatedCompressedSize: 13584764, + expectedSha256Hash: + "6c3b7af01772022a19712410c63342ba581468c2f1aac34d7488409c4043e697", + }, + }, + ruen: { + vocab: { + name: "vocab.ruen.spm", + size: 936576, + estimatedCompressedSize: 435801, + expectedSha256Hash: + "aaf9a325c0a988c507d0312cb6ba1a02bac7a370bcd879aedee626a40bfbda78", + }, + lex: { + name: "lex.50.50.ruen.s2t.bin", + size: 5090836, + estimatedCompressedSize: 2684919, + expectedSha256Hash: + "e6667e22f5f86be4872e3768b7184727f5dd8c9f2ccfb0639baabcb1176f5d11", + }, + model: { + name: "model.ruen.intgemm.alphas.bin", + size: 17140836, + estimatedCompressedSize: 13108893, + expectedSha256Hash: + "3b6a0305e3d232fadd54f5a765365b7b96ad6d8f2e818cba594b02fbd8fadb3d", + }, + }, + csen: { + vocab: { + name: "vocab.csen.spm", + size: 769763, + estimatedCompressedSize: 366392, + expectedSha256Hash: + "f71cc5d045e479607078e079884f44032f5a0b82547fb96eefa29cd1eb47c6f3", + }, + lex: { + name: "lex.50.50.csen.s2t.bin", + size: 4535788, + estimatedCompressedSize: 2418488, + expectedSha256Hash: + "8228a3c3f7887759a62b7d7c674a7bef9b70161913f9b0939ab58f71186835c2", + }, + model: { + name: "model.csen.intgemm.alphas.bin", + size: 17140756, + estimatedCompressedSize: 13045032, + expectedSha256Hash: + "5b16661e2864dc50b2f4091a16bdd4ec8d8283e04271e602159ba348df5d6e2d", + }, + }, + deen: { + vocab: { + name: "vocab.deen.spm", + size: 784269, + estimatedCompressedSize: 410738, + expectedSha256Hash: + "417668f2ed297970febafb5b079a9d5ebc4ed0b3550ac8386d67a90473a09bd7", + }, + lex: { + name: "lex.50.50.deen.s2t.bin", + size: 5047568, + estimatedCompressedSize: 2657472, + expectedSha256Hash: + "2f7c0f7bbce97ae5b52454074a892ba7b7610fb98e3c5d341e4ca79f0850c4de", + }, + model: { + name: "model.deen.intgemm.alphas.bin", + size: 17140837, + estimatedCompressedSize: 13091214, + expectedSha256Hash: + "dda44d87ab0d8ad3b3871122fd3ee385f37878183a8b4ec139cd909531ec5009", + }, + }, + encs: { + vocab: { + name: "vocab.csen.spm", + size: 769763, + estimatedCompressedSize: 366392, + expectedSha256Hash: + "f71cc5d045e479607078e079884f44032f5a0b82547fb96eefa29cd1eb47c6f3", + }, + lex: { + name: "lex.50.50.encs.s2t.bin", + size: 3556124, + estimatedCompressedSize: 1913246, + expectedSha256Hash: + "e19c77231bf977988e31ff8db15fe79966b5170564bd3e10613f239e7f461d97", + }, + model: { + name: "model.encs.intgemm.alphas.bin", + size: 17140756, + estimatedCompressedSize: 12630325, + expectedSha256Hash: + "9a2fe0588bd972accfc801e2f31c945de0557804a91666ae5ab43b94fb74ac4b", + }, + }, + ende: { + vocab: { + name: "vocab.deen.spm", + size: 797501, + estimatedCompressedSize: 412505, + expectedSha256Hash: + "bc8f8229933d8294c727f3eab12f6f064e7082b929f2d29494c8a1e619ba174c", + }, + lex: { + name: "lex.50.50.ende.s2t.bin", + size: 3062492, + estimatedCompressedSize: 1575385, + expectedSha256Hash: + "764797d075f0642c0b079cce6547348d65fe4e92ac69fa6a8605cd8b53dacb3f", + }, + model: { + name: "model.ende.intgemm.alphas.bin", + size: 17140498, + estimatedCompressedSize: 13207068, + expectedSha256Hash: + "f0946515c6645304f0706fa66a051c3b7b7c507f12d0c850f276c18165a10c14", + }, + }, + enes: { + vocab: { + name: "vocab.esen.spm", + size: 825463, + estimatedCompressedSize: 414566, + expectedSha256Hash: + "909b1eea1face0d7f90a474fe29a8c0fef8d104b6e41e65616f864c964ba8845", + }, + lex: { + name: "lex.50.50.enes.s2t.bin", + size: 3347104, + estimatedCompressedSize: 1720700, + expectedSha256Hash: + "3a113d713dec3cf1d12bba5b138ae616e28bba4bbc7fe7fd39ba145e26b86d7f", + }, + model: { + name: "model.enes.intgemm.alphas.bin", + size: 17140755, + estimatedCompressedSize: 12602853, + expectedSha256Hash: + "fa7460037a3163e03fe1d23602f964bff2331da6ee813637e092ddf37156ef53", + }, + }, + enet: { + vocab: { + name: "vocab.eten.spm", + size: 828426, + estimatedCompressedSize: 416995, + expectedSha256Hash: + "e3b66bc141f6123cd40746e2fb9b8ee4f89cbf324ab27d6bbf3782e52f15fa2d", + }, + lex: { + name: "lex.50.50.enet.s2t.bin", + size: 2700780, + estimatedCompressedSize: 1336443, + expectedSha256Hash: + "3d1b40ff43ebef82cf98d416a88a1ea19eb325a85785eef102f59878a63a829d", + }, + model: { + name: "model.enet.intgemm.alphas.bin", + size: 17140754, + estimatedCompressedSize: 12543318, + expectedSha256Hash: + "a28874a8b702a519a14dc71bcee726a5cb4b539eeaada2d06492f751469a1fd6", + }, + }, + esen: { + vocab: { + name: "vocab.esen.spm", + size: 825463, + estimatedCompressedSize: 414566, + expectedSha256Hash: + "909b1eea1face0d7f90a474fe29a8c0fef8d104b6e41e65616f864c964ba8845", + }, + lex: { + name: "lex.50.50.esen.s2t.bin", + size: 3860888, + estimatedCompressedSize: 1978538, + expectedSha256Hash: + "f11a2c23ef85ab1fee1c412b908d69bc20d66fd59faa8f7da5a5f0347eddf969", + }, + model: { + name: "model.esen.intgemm.alphas.bin", + size: 17140755, + estimatedCompressedSize: 13215960, + expectedSha256Hash: + "4b6b7f451094aaa447d012658af158ffc708fc8842dde2f871a58404f5457fe0", + }, + }, + eten: { + vocab: { + name: "vocab.eten.spm", + size: 828426, + estimatedCompressedSize: 416995, + expectedSha256Hash: + "e3b66bc141f6123cd40746e2fb9b8ee4f89cbf324ab27d6bbf3782e52f15fa2d", + }, + lex: { + name: "lex.50.50.eten.s2t.bin", + size: 3974944, + estimatedCompressedSize: 1920655, + expectedSha256Hash: + "6992bedc590e60e610a28129c80746fe5f33144a4520e2c5508d87db14ca54f8", + }, + model: { + name: "model.eten.intgemm.alphas.bin", + size: 17140754, + estimatedCompressedSize: 12222624, + expectedSha256Hash: + "aac98a2371e216ee2d4843cbe896c617f6687501e17225ac83482eba52fd0028", + }, + }, +}; \ No newline at end of file diff --git a/wasm/test_page/js/worker.js b/wasm/test_page/js/worker.js new file mode 100644 index 000000000..1cf3a1461 --- /dev/null +++ b/wasm/test_page/js/worker.js @@ -0,0 +1,298 @@ +// All variables specific to translation service +var translationService, responseOptions, input = undefined; +// A map of language-pair to TranslationModel object +var languagePairToTranslationModels = new Map(); + +const BERGAMOT_TRANSLATOR_MODULE = "bergamot-translator-worker.js"; +const MODEL_REGISTRY = "modelRegistry.js"; + +const encoder = new TextEncoder(); // string to utf-8 converter +const decoder = new TextDecoder(); // utf-8 to string converter + +const start = Date.now(); +let moduleLoadStart; +var Module = { + preRun: [function() { + log(`Time until Module.preRun: ${(Date.now() - start) / 1000} secs`); + moduleLoadStart = Date.now(); + }], + onRuntimeInitialized: function() { + log(`Wasm Runtime initialized Successfully (preRun -> onRuntimeInitialized) in ${(Date.now() - moduleLoadStart) / 1000} secs`); + importScripts(MODEL_REGISTRY); + postMessage([`import_reply`, modelRegistry]); + } +}; + +const log = (message) => { + console.debug(message); +} + +onmessage = async function(e) { + const command = e.data[0]; + log(`Message '${command}' received from main script`); + let result = ""; + if (command === 'import') { + importScripts(BERGAMOT_TRANSLATOR_MODULE); + } else if (command === 'load_model') { + let start = Date.now(); + let from = e.data[1]; + let to = e.data[2]; + try { + await constructTranslationService(); + await constructTranslationModel(from, to); + log(`Model '${from}${to}' successfully constructed. Time taken: ${(Date.now() - start) / 1000} secs`); + result = "Model successfully loaded"; + } catch (error) { + log(`Model '${from}${to}' construction failed: '${error.message}'`); + result = "Model loading failed"; + } + log(`'${command}' command done, Posting message back to main script`); + postMessage([`${command}_reply`, result]); + } else if (command === 'translate') { + const from = e.data[1]; + const to = e.data[2]; + const inputParagraphs = e.data[3]; + let inputWordCount = 0; + inputParagraphs.forEach(sentence => { + inputWordCount += sentence.trim().split(" ").filter(word => word.trim() !== "").length; + }) + let start = Date.now(); + try { + result = translate(from, to, inputParagraphs); + const secs = (Date.now() - start) / 1000; + log(`Translation '${from}${to}' Successful. Speed: ${Math.round(inputWordCount / secs)} WPS (${inputWordCount} words in ${secs} secs)`); + } catch (error) { + log(`Error: ${error.message}`); + } + log(`'${command}' command done, Posting message back to main script`); + postMessage([`${command}_reply`, result]); + } +} + +// Instantiates the Translation Service +const constructTranslationService = async () => { + if (!translationService) { + var translationServiceConfig = {}; + log(`Creating Translation Service with config: ${translationServiceConfig}`); + translationService = new Module.BlockingService(translationServiceConfig); + log(`Translation Service created successfully`); + } +} + +// Constructs a translation model object for the source and target language pair +const constructTranslationModel = async (from, to) => { + // Delete all previously constructed translation models and clear the map + languagePairToTranslationModels.forEach((value, key) => { + log(`Destructing model '${key}'`); + value.delete(); + }); + languagePairToTranslationModels.clear(); + + // If none of the languages is English then construct multiple models with + // English as a pivot language. + if (from !== 'en' && to !== 'en') { + log(`Constructing model '${from}${to}' via pivoting: '${from}en' and 'en${to}'`); + await Promise.all([_constructTranslationModelInvolvingEnglish(from, 'en'), + _constructTranslationModelInvolvingEnglish('en', to)]); + } + else { + log(`Constructing model '${from}${to}'`); + await _constructTranslationModelInvolvingEnglish(from, to); + } +} + +// Translates text from source language to target language. +const translate = (from, to, paragraphs) => { + // If none of the languages is English then perform translation with + // English as a pivot language. + if (from !== 'en' && to !== 'en') { + log(`Translating '${from}${to}' via pivoting: '${from}en' -> 'en${to}'`); + let translatedParagraphsInEnglish = _translateInvolvingEnglish(from, 'en', paragraphs); + return _translateInvolvingEnglish('en', to, translatedParagraphsInEnglish); + } + else { + log(`Translating '${from}${to}'`); + return _translateInvolvingEnglish(from, to, paragraphs); + } +} + +// Downloads file from a url and returns the array buffer +const _downloadAsArrayBuffer = async(url) => { + const response = await fetch(url); + if (!response.ok) { + throw Error(`Downloading ${url} failed: HTTP ${response.status} - ${response.statusText}`); + } + return response.arrayBuffer(); +} + +// Constructs and initializes the AlignedMemory from the array buffer and alignment size +const _prepareAlignedMemoryFromBuffer = async (buffer, alignmentSize) => { + var byteArray = new Int8Array(buffer); + log(`Constructing Aligned memory. Size: ${byteArray.byteLength} bytes, Alignment: ${alignmentSize}`); + var alignedMemory = new Module.AlignedMemory(byteArray.byteLength, alignmentSize); + log(`Aligned memory construction done`); + const alignedByteArrayView = alignedMemory.getByteArrayView(); + alignedByteArrayView.set(byteArray); + log(`Aligned memory initialized`); + return alignedMemory; +} + +const _constructTranslationModelInvolvingEnglish = async (from, to) => { + const languagePair = `${from}${to}`; + + /*Set the Model Configuration as YAML formatted string. + For available configuration options, please check: https://marian-nmt.github.io/docs/cmd/marian-decoder/ + Vocab files are re-used in both translation directions + const vocabLanguagePair = from === "en" ? `${to}${from}` : languagePair; + const modelConfig = `models: + - /${languagePair}/model.${languagePair}.intgemm.alphas.bin + vocabs: + - /${languagePair}/vocab.${vocabLanguagePair}.spm + - /${languagePair}/vocab.${vocabLanguagePair}.spm + beam-size: 1 + normalize: 1.0 + word-penalty: 0 + max-length-break: 128 + mini-batch-words: 1024 + workspace: 128 + max-length-factor: 2.0 + skip-cost: true + cpu-threads: 0 + quiet: true + quiet-translation: true + shortlist: + - /${languagePair}/lex.${languagePair}.s2t + - 50 + - 50 + `; + */ + + // TODO: gemm-precision: int8shiftAlphaAll (for the models that support this) + // DONOT CHANGE THE SPACES BETWEEN EACH ENTRY OF CONFIG + const modelConfig = `beam-size: 1 +normalize: 1.0 +word-penalty: 0 +max-length-break: 128 +mini-batch-words: 1024 +workspace: 128 +max-length-factor: 2.0 +skip-cost: true +cpu-threads: 0 +quiet: true +quiet-translation: true +gemm-precision: int8shiftAll +`; + + const modelFile = `${rootURL}/${languagePair}/${modelRegistry[languagePair]["model"].name}`; + const shortlistFile = `${rootURL}/${languagePair}/${modelRegistry[languagePair]["lex"].name}`; + const vocabFiles = [`${rootURL}/${languagePair}/${modelRegistry[languagePair]["vocab"].name}`, + `${rootURL}/${languagePair}/${modelRegistry[languagePair]["vocab"].name}`]; + + const uniqueVocabFiles = new Set(vocabFiles); + log(`modelFile: ${modelFile}\nshortlistFile: ${shortlistFile}\nNo. of unique vocabs: ${uniqueVocabFiles.size}`); + uniqueVocabFiles.forEach(item => log(`unique vocabFile: ${item}`)); + + // Download the files as buffers from the given urls + let start = Date.now(); + const downloadedBuffers = await Promise.all([_downloadAsArrayBuffer(modelFile), _downloadAsArrayBuffer(shortlistFile)]); + const modelBuffer = downloadedBuffers[0]; + const shortListBuffer = downloadedBuffers[1]; + + const downloadedVocabBuffers = []; + for (let item of uniqueVocabFiles.values()) { + downloadedVocabBuffers.push(await _downloadAsArrayBuffer(item)); + } + log(`Total Download time for all files of '${languagePair}': ${(Date.now() - start) / 1000} secs`); + + // Construct AlignedMemory objects with downloaded buffers + let constructedAlignedMemories = await Promise.all([_prepareAlignedMemoryFromBuffer(modelBuffer, 256), + _prepareAlignedMemoryFromBuffer(shortListBuffer, 64)]); + let alignedModelMemory = constructedAlignedMemories[0]; + let alignedShortlistMemory = constructedAlignedMemories[1]; + let alignedVocabsMemoryList = new Module.AlignedMemoryList; + for(let item of downloadedVocabBuffers) { + let alignedMemory = await _prepareAlignedMemoryFromBuffer(item, 64); + alignedVocabsMemoryList.push_back(alignedMemory); + } + for (let vocabs=0; vocabs < alignedVocabsMemoryList.size(); vocabs++) { + log(`Aligned vocab memory${vocabs+1} size: ${alignedVocabsMemoryList.get(vocabs).size()}`); + } + log(`Aligned model memory size: ${alignedModelMemory.size()}`); + log(`Aligned shortlist memory size: ${alignedShortlistMemory.size()}`); + + log(`Translation Model config: ${modelConfig}`); + var translationModel = new Module.TranslationModel(modelConfig, alignedModelMemory, alignedShortlistMemory, alignedVocabsMemoryList); + languagePairToTranslationModels.set(languagePair, translationModel); +} + +const _translateInvolvingEnglish = (from, to, paragraphs) => { + const languagePair = `${from}${to}`; + if (!languagePairToTranslationModels.has(languagePair)) { + throw Error(`Please load translation model '${languagePair}' before translating`); + } + translationModel = languagePairToTranslationModels.get(languagePair); + + // Instantiate the arguments of translate() API i.e. ResponseOptions and input (vector) + var responseOptions = new Module.ResponseOptions(); + let input = new Module.VectorString; + + // Initialize the input + paragraphs.forEach(paragraph => { + // prevent empty paragraph - it breaks the translation + if (paragraph.trim() === "") { + return; + } + input.push_back(paragraph.trim()) + }) + + // Access input (just for debugging) + log(`Input size: ${input.size()}`); + + // Translate the input, which is a vector; the result is a vector + let result = translationService.translate(translationModel, input, responseOptions); + + const translatedParagraphs = []; + const translatedSentencesOfParagraphs = []; + const sourceSentencesOfParagraphs = []; + for (let i = 0; i < result.size(); i++) { + translatedParagraphs.push(result.get(i).getTranslatedText()); + translatedSentencesOfParagraphs.push(_getAllTranslatedSentencesOfParagraph(result.get(i))); + sourceSentencesOfParagraphs.push(_getAllSourceSentencesOfParagraph(result.get(i))); + } + + responseOptions.delete(); + input.delete(); + return translatedParagraphs; +} + +// Extracts all the translated sentences from the Response and returns them. +const _getAllTranslatedSentencesOfParagraph = (response) => { + const sentences = []; + const text = response.getTranslatedText(); + for (let sentenceIndex = 0; sentenceIndex < response.size(); sentenceIndex++) { + const utf8SentenceByteRange = response.getTranslatedSentence(sentenceIndex); + sentences.push(_getSentenceFromByteRange(text, utf8SentenceByteRange)); + } + return sentences; +} + +// Extracts all the source sentences from the Response and returns them. +const _getAllSourceSentencesOfParagraph = (response) => { + const sentences = []; + const text = response.getOriginalText(); + for (let sentenceIndex = 0; sentenceIndex < response.size(); sentenceIndex++) { + const utf8SentenceByteRange = response.getSourceSentence(sentenceIndex); + sentences.push(_getSentenceFromByteRange(text, utf8SentenceByteRange)); + } + return sentences; +} + +/* + * Returns a substring of text (a string). The substring is represented by + * byteRange (begin and end endices) within the utf-8 encoded version of the text. + */ +const _getSentenceFromByteRange = (text, byteRange) => { + const utf8BytesView = encoder.encode(text); + const utf8SentenceBytes = utf8BytesView.subarray(byteRange.begin, byteRange.end); + return decoder.decode(utf8SentenceBytes); +} diff --git a/wasm/test_page/package-lock.json b/wasm/test_page/package-lock.json index ae4cb9dd6..065c92de8 100644 --- a/wasm/test_page/package-lock.json +++ b/wasm/test_page/package-lock.json @@ -1,6 +1,519 @@ { + "name": "test_page", + "lockfileVersion": 2, "requires": true, - "lockfileVersion": 1, + "packages": { + "": { + "dependencies": { + "cors": "^2.8.5", + "express": "^4.17.1", + "nocache": "^2.1.0" + } + }, + "node_modules/accepts": { + "version": "1.3.7", + "resolved": "https://registry.npmjs.org/accepts/-/accepts-1.3.7.tgz", + "integrity": "sha512-Il80Qs2WjYlJIBNzNkK6KYqlVMTbZLXgHx2oT0pU/fjRHyEp+PEfEPY0R3WCwAGVOtauxh1hOxNgIf5bv7dQpA==", + "dependencies": { + "mime-types": "~2.1.24", + "negotiator": "0.6.2" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/array-flatten": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/array-flatten/-/array-flatten-1.1.1.tgz", + "integrity": "sha1-ml9pkFGx5wczKPKgCJaLZOopVdI=" + }, + "node_modules/body-parser": { + "version": "1.19.0", + "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.19.0.tgz", + "integrity": "sha512-dhEPs72UPbDnAQJ9ZKMNTP6ptJaionhP5cBb541nXPlW60Jepo9RV/a4fX4XWW9CuFNK22krhrj1+rgzifNCsw==", + "dependencies": { + "bytes": "3.1.0", + "content-type": "~1.0.4", + "debug": "2.6.9", + "depd": "~1.1.2", + "http-errors": "1.7.2", + "iconv-lite": "0.4.24", + "on-finished": "~2.3.0", + "qs": "6.7.0", + "raw-body": "2.4.0", + "type-is": "~1.6.17" + }, + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/bytes": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/bytes/-/bytes-3.1.0.tgz", + "integrity": "sha512-zauLjrfCG+xvoyaqLoV8bLVXXNGC4JqlxFCutSDWA6fJrTo2ZuvLYTqZ7aHBLZSMOopbzwv8f+wZcVzfVTI2Dg==", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/content-disposition": { + "version": "0.5.3", + "resolved": "https://registry.npmjs.org/content-disposition/-/content-disposition-0.5.3.tgz", + "integrity": "sha512-ExO0774ikEObIAEV9kDo50o+79VCUdEB6n6lzKgGwupcVeRlhrj3qGAfwq8G6uBJjkqLrhT0qEYFcWng8z1z0g==", + "dependencies": { + "safe-buffer": "5.1.2" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/content-type": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/content-type/-/content-type-1.0.4.tgz", + "integrity": "sha512-hIP3EEPs8tB9AT1L+NUqtwOAps4mk2Zob89MWXMHjHWg9milF/j4osnnQLXBCBFBk/tvIG/tUc9mOUJiPBhPXA==", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/cookie": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.4.0.tgz", + "integrity": "sha512-+Hp8fLp57wnUSt0tY0tHEXh4voZRDnoIrZPqlo3DPiI4y9lwg/jqx+1Om94/W6ZaPDOUbnjOt/99w66zk+l1Xg==", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/cookie-signature": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/cookie-signature/-/cookie-signature-1.0.6.tgz", + "integrity": "sha1-4wOogrNCzD7oylE6eZmXNNqzriw=" + }, + "node_modules/cors": { + "version": "2.8.5", + "resolved": "https://registry.npmjs.org/cors/-/cors-2.8.5.tgz", + "integrity": "sha512-KIHbLJqu73RGr/hnbrO9uBeixNGuvSQjul/jdFvS/KFSIH1hWVd1ng7zOHx+YrEfInLG7q4n6GHQ9cDtxv/P6g==", + "dependencies": { + "object-assign": "^4", + "vary": "^1" + }, + "engines": { + "node": ">= 0.10" + } + }, + "node_modules/debug": { + "version": "2.6.9", + "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz", + "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==", + "dependencies": { + "ms": "2.0.0" + } + }, + "node_modules/depd": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/depd/-/depd-1.1.2.tgz", + "integrity": "sha1-m81S4UwJd2PnSbJ0xDRu0uVgtak=", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/destroy": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/destroy/-/destroy-1.0.4.tgz", + "integrity": "sha1-l4hXRCxEdJ5CBmE+N5RiBYJqvYA=" + }, + "node_modules/ee-first": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/ee-first/-/ee-first-1.1.1.tgz", + "integrity": "sha1-WQxhFWsK4vTwJVcyoViyZrxWsh0=" + }, + "node_modules/encodeurl": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-1.0.2.tgz", + "integrity": "sha1-rT/0yG7C0CkyL1oCw6mmBslbP1k=", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/escape-html": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/escape-html/-/escape-html-1.0.3.tgz", + "integrity": "sha1-Aljq5NPQwJdN4cFpGI7wBR0dGYg=" + }, + "node_modules/etag": { + "version": "1.8.1", + "resolved": "https://registry.npmjs.org/etag/-/etag-1.8.1.tgz", + "integrity": "sha1-Qa4u62XvpiJorr/qg6x9eSmbCIc=", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/express": { + "version": "4.17.1", + "resolved": "https://registry.npmjs.org/express/-/express-4.17.1.tgz", + "integrity": "sha512-mHJ9O79RqluphRrcw2X/GTh3k9tVv8YcoyY4Kkh4WDMUYKRZUq0h1o0w2rrrxBqM7VoeUVqgb27xlEMXTnYt4g==", + "dependencies": { + "accepts": "~1.3.7", + "array-flatten": "1.1.1", + "body-parser": "1.19.0", + "content-disposition": "0.5.3", + "content-type": "~1.0.4", + "cookie": "0.4.0", + "cookie-signature": "1.0.6", + "debug": "2.6.9", + "depd": "~1.1.2", + "encodeurl": "~1.0.2", + "escape-html": "~1.0.3", + "etag": "~1.8.1", + "finalhandler": "~1.1.2", + "fresh": "0.5.2", + "merge-descriptors": "1.0.1", + "methods": "~1.1.2", + "on-finished": "~2.3.0", + "parseurl": "~1.3.3", + "path-to-regexp": "0.1.7", + "proxy-addr": "~2.0.5", + "qs": "6.7.0", + "range-parser": "~1.2.1", + "safe-buffer": "5.1.2", + "send": "0.17.1", + "serve-static": "1.14.1", + "setprototypeof": "1.1.1", + "statuses": "~1.5.0", + "type-is": "~1.6.18", + "utils-merge": "1.0.1", + "vary": "~1.1.2" + }, + "engines": { + "node": ">= 0.10.0" + } + }, + "node_modules/finalhandler": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/finalhandler/-/finalhandler-1.1.2.tgz", + "integrity": "sha512-aAWcW57uxVNrQZqFXjITpW3sIUQmHGG3qSb9mUah9MgMC4NeWhNOlNjXEYq3HjRAvL6arUviZGGJsBg6z0zsWA==", + "dependencies": { + "debug": "2.6.9", + "encodeurl": "~1.0.2", + "escape-html": "~1.0.3", + "on-finished": "~2.3.0", + "parseurl": "~1.3.3", + "statuses": "~1.5.0", + "unpipe": "~1.0.0" + }, + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/forwarded": { + "version": "0.1.2", + "resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.1.2.tgz", + "integrity": "sha1-mMI9qxF1ZXuMBXPozszZGw/xjIQ=", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/fresh": { + "version": "0.5.2", + "resolved": "https://registry.npmjs.org/fresh/-/fresh-0.5.2.tgz", + "integrity": "sha1-PYyt2Q2XZWn6g1qx+OSyOhBWBac=", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/http-errors": { + "version": "1.7.2", + "resolved": "https://registry.npmjs.org/http-errors/-/http-errors-1.7.2.tgz", + "integrity": "sha512-uUQBt3H/cSIVfch6i1EuPNy/YsRSOUBXTVfZ+yR7Zjez3qjBz6i9+i4zjNaoqcoFVI4lQJ5plg63TvGfRSDCRg==", + "dependencies": { + "depd": "~1.1.2", + "inherits": "2.0.3", + "setprototypeof": "1.1.1", + "statuses": ">= 1.5.0 < 2", + "toidentifier": "1.0.0" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/iconv-lite": { + "version": "0.4.24", + "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.24.tgz", + "integrity": "sha512-v3MXnZAcvnywkTUEZomIActle7RXXeedOR31wwl7VlyoXO4Qi9arvSenNQWne1TcRwhCL1HwLI21bEqdpj8/rA==", + "dependencies": { + "safer-buffer": ">= 2.1.2 < 3" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/inherits": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.3.tgz", + "integrity": "sha1-Yzwsg+PaQqUC9SRmAiSA9CCCYd4=" + }, + "node_modules/ipaddr.js": { + "version": "1.9.1", + "resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-1.9.1.tgz", + "integrity": "sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g==", + "engines": { + "node": ">= 0.10" + } + }, + "node_modules/media-typer": { + "version": "0.3.0", + "resolved": "https://registry.npmjs.org/media-typer/-/media-typer-0.3.0.tgz", + "integrity": "sha1-hxDXrwqmJvj/+hzgAWhUUmMlV0g=", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/merge-descriptors": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-1.0.1.tgz", + "integrity": "sha1-sAqqVW3YtEVoFQ7J0blT8/kMu2E=" + }, + "node_modules/methods": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/methods/-/methods-1.1.2.tgz", + "integrity": "sha1-VSmk1nZUE07cxSZmVoNbD4Ua/O4=", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/mime": { + "version": "1.6.0", + "resolved": "https://registry.npmjs.org/mime/-/mime-1.6.0.tgz", + "integrity": "sha512-x0Vn8spI+wuJ1O6S7gnbaQg8Pxh4NNHb7KSINmEWKiPE4RKOplvijn+NkmYmmRgP68mc70j2EbeTFRsrswaQeg==", + "bin": { + "mime": "cli.js" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/mime-db": { + "version": "1.45.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.45.0.tgz", + "integrity": "sha512-CkqLUxUk15hofLoLyljJSrukZi8mAtgd+yE5uO4tqRZsdsAJKv0O+rFMhVDRJgozy+yG6md5KwuXhD4ocIoP+w==", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/mime-types": { + "version": "2.1.28", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.28.tgz", + "integrity": "sha512-0TO2yJ5YHYr7M2zzT7gDU1tbwHxEUWBCLt0lscSNpcdAfFyJOVEpRYNS7EXVcTLNj/25QO8gulHC5JtTzSE2UQ==", + "dependencies": { + "mime-db": "1.45.0" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/ms": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", + "integrity": "sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g=" + }, + "node_modules/negotiator": { + "version": "0.6.2", + "resolved": "https://registry.npmjs.org/negotiator/-/negotiator-0.6.2.tgz", + "integrity": "sha512-hZXc7K2e+PgeI1eDBe/10Ard4ekbfrrqG8Ep+8Jmf4JID2bNg7NvCPOZN+kfF574pFQI7mum2AUqDidoKqcTOw==", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/nocache": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/nocache/-/nocache-2.1.0.tgz", + "integrity": "sha512-0L9FvHG3nfnnmaEQPjT9xhfN4ISk0A8/2j4M37Np4mcDesJjHgEUfgPhdCyZuFI954tjokaIj/A3NdpFNdEh4Q==", + "engines": { + "node": ">=4.0.0" + } + }, + "node_modules/object-assign": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz", + "integrity": "sha1-IQmtx5ZYh8/AXLvUQsrIv7s2CGM=", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/on-finished": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/on-finished/-/on-finished-2.3.0.tgz", + "integrity": "sha1-IPEzZIGwg811M3mSoWlxqi2QaUc=", + "dependencies": { + "ee-first": "1.1.1" + }, + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/parseurl": { + "version": "1.3.3", + "resolved": "https://registry.npmjs.org/parseurl/-/parseurl-1.3.3.tgz", + "integrity": "sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ==", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/path-to-regexp": { + "version": "0.1.7", + "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-0.1.7.tgz", + "integrity": "sha1-32BBeABfUi8V60SQ5yR6G/qmf4w=" + }, + "node_modules/proxy-addr": { + "version": "2.0.6", + "resolved": "https://registry.npmjs.org/proxy-addr/-/proxy-addr-2.0.6.tgz", + "integrity": "sha512-dh/frvCBVmSsDYzw6n926jv974gddhkFPfiN8hPOi30Wax25QZyZEGveluCgliBnqmuM+UJmBErbAUFIoDbjOw==", + "dependencies": { + "forwarded": "~0.1.2", + "ipaddr.js": "1.9.1" + }, + "engines": { + "node": ">= 0.10" + } + }, + "node_modules/qs": { + "version": "6.7.0", + "resolved": "https://registry.npmjs.org/qs/-/qs-6.7.0.tgz", + "integrity": "sha512-VCdBRNFTX1fyE7Nb6FYoURo/SPe62QCaAyzJvUjwRaIsc+NePBEniHlvxFmmX56+HZphIGtV0XeCirBtpDrTyQ==", + "engines": { + "node": ">=0.6" + } + }, + "node_modules/range-parser": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/range-parser/-/range-parser-1.2.1.tgz", + "integrity": "sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg==", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/raw-body": { + "version": "2.4.0", + "resolved": "https://registry.npmjs.org/raw-body/-/raw-body-2.4.0.tgz", + "integrity": "sha512-4Oz8DUIwdvoa5qMJelxipzi/iJIi40O5cGV1wNYp5hvZP8ZN0T+jiNkL0QepXs+EsQ9XJ8ipEDoiH70ySUJP3Q==", + "dependencies": { + "bytes": "3.1.0", + "http-errors": "1.7.2", + "iconv-lite": "0.4.24", + "unpipe": "1.0.0" + }, + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/safe-buffer": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz", + "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==" + }, + "node_modules/safer-buffer": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", + "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==" + }, + "node_modules/send": { + "version": "0.17.1", + "resolved": "https://registry.npmjs.org/send/-/send-0.17.1.tgz", + "integrity": "sha512-BsVKsiGcQMFwT8UxypobUKyv7irCNRHk1T0G680vk88yf6LBByGcZJOTJCrTP2xVN6yI+XjPJcNuE3V4fT9sAg==", + "dependencies": { + "debug": "2.6.9", + "depd": "~1.1.2", + "destroy": "~1.0.4", + "encodeurl": "~1.0.2", + "escape-html": "~1.0.3", + "etag": "~1.8.1", + "fresh": "0.5.2", + "http-errors": "~1.7.2", + "mime": "1.6.0", + "ms": "2.1.1", + "on-finished": "~2.3.0", + "range-parser": "~1.2.1", + "statuses": "~1.5.0" + }, + "engines": { + "node": ">= 0.8.0" + } + }, + "node_modules/send/node_modules/ms": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.1.tgz", + "integrity": "sha512-tgp+dl5cGk28utYktBsrFqA7HKgrhgPsg6Z/EfhWI4gl1Hwq8B/GmY/0oXZ6nF8hDVesS/FpnYaD/kOWhYQvyg==" + }, + "node_modules/serve-static": { + "version": "1.14.1", + "resolved": "https://registry.npmjs.org/serve-static/-/serve-static-1.14.1.tgz", + "integrity": "sha512-JMrvUwE54emCYWlTI+hGrGv5I8dEwmco/00EvkzIIsR7MqrHonbD9pO2MOfFnpFntl7ecpZs+3mW+XbQZu9QCg==", + "dependencies": { + "encodeurl": "~1.0.2", + "escape-html": "~1.0.3", + "parseurl": "~1.3.3", + "send": "0.17.1" + }, + "engines": { + "node": ">= 0.8.0" + } + }, + "node_modules/setprototypeof": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.1.1.tgz", + "integrity": "sha512-JvdAWfbXeIGaZ9cILp38HntZSFSo3mWg6xGcJJsd+d4aRMOqauag1C63dJfDw7OaMYwEbHMOxEZ1lqVRYP2OAw==" + }, + "node_modules/statuses": { + "version": "1.5.0", + "resolved": "https://registry.npmjs.org/statuses/-/statuses-1.5.0.tgz", + "integrity": "sha1-Fhx9rBd2Wf2YEfQ3cfqZOBR4Yow=", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/toidentifier": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/toidentifier/-/toidentifier-1.0.0.tgz", + "integrity": "sha512-yaOH/Pk/VEhBWWTlhI+qXxDFXlejDGcQipMlyxda9nthulaxLZUNcUqFxokp0vcYnvteJln5FNQDRrxj3YcbVw==", + "engines": { + "node": ">=0.6" + } + }, + "node_modules/type-is": { + "version": "1.6.18", + "resolved": "https://registry.npmjs.org/type-is/-/type-is-1.6.18.tgz", + "integrity": "sha512-TkRKr9sUTxEH8MdfuCSP7VizJyzRNMjj2J2do2Jr3Kym598JVdEksuzPQCnlFPW4ky9Q+iA+ma9BGm06XQBy8g==", + "dependencies": { + "media-typer": "0.3.0", + "mime-types": "~2.1.24" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/unpipe": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/unpipe/-/unpipe-1.0.0.tgz", + "integrity": "sha1-sr9O6FFKrmFltIF4KdIbLvSZBOw=", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/utils-merge": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/utils-merge/-/utils-merge-1.0.1.tgz", + "integrity": "sha1-n5VxD1CiZ5R7LMwSR0HBAoQn5xM=", + "engines": { + "node": ">= 0.4.0" + } + }, + "node_modules/vary": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/vary/-/vary-1.1.2.tgz", + "integrity": "sha1-IpnwLG3tMNSllhsLn3RSShj2NPw=", + "engines": { + "node": ">= 0.8" + } + } + }, "dependencies": { "accepts": { "version": "1.3.7", diff --git a/wasm/test_page/start_server.sh b/wasm/test_page/start_server.sh index 911364665..8cb90071c 100644 --- a/wasm/test_page/start_server.sh +++ b/wasm/test_page/start_server.sh @@ -19,13 +19,13 @@ if [ ! -e "$1" ]; then exit fi -WASM_ARTIFACTS="$1/bergamot-translator-worker.*" +WASM_ARTIFACTS="$1/bergamot-translator-worker.js $1/bergamot-translator-worker.wasm" for i in $WASM_ARTIFACTS; do [ -f "$i" ] || breaks - cp $i . + cp $i js/. echo "Copied \"$i\"" done npm install echo "Start httpserver" -node bergamot-httpserver.js \ No newline at end of file +node bergamot-httpserver.js 80 1 0 \ No newline at end of file diff --git a/wasm/test_page/worker.js b/wasm/test_page/worker.js deleted file mode 100644 index 8b53a271a..000000000 --- a/wasm/test_page/worker.js +++ /dev/null @@ -1,267 +0,0 @@ -var translationService, responseOptions, input = undefined; -// A map of language-pair to TranslationModel object -var translationModels = new Map(); -const BERGAMOT_TRANSLATOR_MODULE = "bergamot-translator-worker.js"; - -const encoder = new TextEncoder(); // string to utf-8 converter -const decoder = new TextDecoder(); // utf-8 to string converter - -const start = Date.now(); -let moduleLoadStart; -var Module = { - preRun: [function() { - log(`Time until Module.preRun: ${(Date.now() - start) / 1000} secs`); - moduleLoadStart = Date.now(); - }], - onRuntimeInitialized: function() { - log(`Wasm Runtime initialized (preRun -> onRuntimeInitialized) in ${(Date.now() - moduleLoadStart) / 1000} secs`); - } -}; - -const log = (message) => { - console.debug(message); -} - -onmessage = async function(e) { - let command = e.data[0]; - log(`Message '${command}' received from main script`); - let result = ""; - if (command === 'load_module') { - importScripts(BERGAMOT_TRANSLATOR_MODULE); - result = `Translator wasm module successfully loaded`; - log(result); - log('Posting message back to main script'); - postMessage(['module_loaded', result]); - } - else if (command === 'load_model') { - let start = Date.now(); - try { - await constructTranslationService(); - await constructTranslationModel(e.data[1], e.data[2]); - result = `translation model '${e.data[1]}${e.data[2]}' successfully loaded; took ${(Date.now() - start) / 1000} secs`; - } catch (error) { - result = `translation model '${e.data[1]}${e.data[2]}' loading failed: '${error.message}'`; - } - log(result); - log('Posting message back to main script'); - postMessage(['model_loaded', result]); - } - else if (command === 'translate') { - const from = e.data[1]; - const to = e.data[2]; - const inputParagraphs = e.data[3]; - let inputWordCount = 0; - inputParagraphs.forEach(sentence => { - inputWordCount += sentence.trim().split(" ").filter(word => word.trim() !== "").length; - }) - - let start = Date.now(); - var translatedParagraphs; - try { - translatedParagraphs = translate(from, to, inputParagraphs); - const secs = (Date.now() - start) / 1000; - result = `Translation '${from}${to}' Successful. Speed: ${Math.round(inputWordCount / secs)} Words per second (${inputWordCount} words in ${secs} secs)`; - } catch (error) { - result = `Error: ${error.message}`; - } - log(result); - log('Posting message back to main script'); - postMessage(['translated_result', translatedParagraphs, result]); - } -} - -// This function downloads file from a url and returns the array buffer -const downloadAsArrayBuffer = async(url) => { - const response = await fetch(url); - if (!response.ok) { - throw Error(`Downloading ${url} failed: HTTP ${response.status} - ${response.statusText}`); - } - return response.arrayBuffer(); -} - -// This function constructs and initializes the AlignedMemory from the array buffer and alignment size -const prepareAlignedMemoryFromBuffer = async (buffer, alignmentSize) => { - var byteArray = new Int8Array(buffer); - log(`Constructing Aligned memory with size: ${byteArray.byteLength} bytes with alignment: ${alignmentSize}`); - var alignedMemory = new Module.AlignedMemory(byteArray.byteLength, alignmentSize); - log(`Aligned memory construction done`); - const alignedByteArrayView = alignedMemory.getByteArrayView(); - alignedByteArrayView.set(byteArray); - log(`Aligned memory initialized`); - return alignedMemory; -} - -// Instantiate the Translation Service -const constructTranslationService = async () => { - if (!translationService) { - var translationServiceConfig = {}; - log(`Creating Translation Service with config: ${translationServiceConfig}`); - translationService = new Module.BlockingService(translationServiceConfig); - log(`Translation Service created successfully`); - } -} - -const constructTranslationModel = async (from, to) => { - const languagePair = `${from}${to}`; - if (translationModels.has(languagePair)) { - var oldModel = translationModels.get(languagePair); - // Destruct the old TranslationModel explicitly and Remove its entry from the map - oldModel.delete(); - translationModels.delete(languagePair); - } - - // Vocab files are re-used in both translation directions - const vocabLanguagePair = from === "en" ? `${to}${from}` : languagePair; - - // Set the Model Configuration as YAML formatted string. - // For available configuration options, please check: https://marian-nmt.github.io/docs/cmd/marian-decoder/ - /*const modelConfig = `models: - - /${languagePair}/model.${languagePair}.intgemm.alphas.bin - vocabs: - - /${languagePair}/vocab.${vocabLanguagePair}.spm - - /${languagePair}/vocab.${vocabLanguagePair}.spm - beam-size: 1 - normalize: 1.0 - word-penalty: 0 - max-length-break: 128 - mini-batch-words: 1024 - workspace: 128 - max-length-factor: 2.0 - skip-cost: true - cpu-threads: 0 - quiet: true - quiet-translation: true - shortlist: - - /${languagePair}/lex.${languagePair}.s2t - - 50 - - 50 - `; - */ - - // TODO: gemm-precision: int8shiftAlphaAll (for the models that support this) - // DONOT CHANGE THE SPACES BETWEEN EACH ENTRY OF CONFIG - const modelConfig = `beam-size: 1 -normalize: 1.0 -word-penalty: 0 -max-length-break: 128 -mini-batch-words: 1024 -workspace: 128 -max-length-factor: 2.0 -skip-cost: true -cpu-threads: 0 -quiet: true -quiet-translation: true -gemm-precision: int8shift -`; - - const modelFile = `models/${languagePair}/model.${languagePair}.intgemm.alphas.bin`; - const shortlistFile = `models/${languagePair}/lex.50.50.${languagePair}.s2t.bin`; - const vocabFiles = [`models/${languagePair}/vocab.${vocabLanguagePair}.spm`, - `models/${languagePair}/vocab.${vocabLanguagePair}.spm`]; - - const uniqueVocabFiles = new Set(vocabFiles); - log(`modelFile: ${modelFile}\nshortlistFile: ${shortlistFile}\nNo. of unique vocabs: ${uniqueVocabFiles.size}`); - uniqueVocabFiles.forEach(item => log(`unique vocabFile: ${item}`)); - - // Download the files as buffers from the given urls - let start = Date.now(); - const downloadedBuffers = await Promise.all([downloadAsArrayBuffer(modelFile), downloadAsArrayBuffer(shortlistFile)]); - const modelBuffer = downloadedBuffers[0]; - const shortListBuffer = downloadedBuffers[1]; - - const downloadedVocabBuffers = []; - for (let item of uniqueVocabFiles.values()) { - downloadedVocabBuffers.push(await downloadAsArrayBuffer(item)); - } - log(`All files for ${languagePair} language pair took ${(Date.now() - start) / 1000} secs to download`); - - // Construct AlignedMemory objects with downloaded buffers - let constructedAlignedMemories = await Promise.all([prepareAlignedMemoryFromBuffer(modelBuffer, 256), - prepareAlignedMemoryFromBuffer(shortListBuffer, 64)]); - let alignedModelMemory = constructedAlignedMemories[0]; - let alignedShortlistMemory = constructedAlignedMemories[1]; - let alignedVocabsMemoryList = new Module.AlignedMemoryList; - for(let item of downloadedVocabBuffers) { - let alignedMemory = await prepareAlignedMemoryFromBuffer(item, 64); - alignedVocabsMemoryList.push_back(alignedMemory); - } - log(`Aligned vocab memories: ${alignedVocabsMemoryList.get(0).size()}`); - log(`Aligned model memory: ${alignedModelMemory.size()}`); - log(`Aligned shortlist memory: ${alignedShortlistMemory.size()}`); - - log(`Creating Translation Model with config: ${modelConfig}`); - var translationModel = new Module.TranslationModel(modelConfig, alignedModelMemory, alignedShortlistMemory, alignedVocabsMemoryList); - translationModels.set(languagePair, translationModel); -} - -const translate = (from, to, paragraphs) => { - const languagePair = `${from}${to}`; - if (!translationModels.has(languagePair)) { - throw Error(`Please load translation model '${languagePair}' before translating`); - } - translationModel = translationModels.get(languagePair); - - // Instantiate the arguments of translate() API i.e. ResponseOptions and input (vector) - var responseOptions = new Module.ResponseOptions(); - let input = new Module.VectorString; - - // Initialize the input - paragraphs.forEach(paragraph => { - // prevent empty paragraph - it breaks the translation - if (paragraph.trim() === "") { - return; - } - input.push_back(paragraph.trim()) - }) - // Access input (just for debugging) - log(`Input size: ${input.size()}`); - - // Translate the input, which is a vector; the result is a vector - let result = translationService.translate(translationModel, input, responseOptions); - - const translatedParagraphs = []; - const translatedSentencesOfParagraphs = []; - const sourceSentencesOfParagraphs = []; - for (let i = 0; i < result.size(); i++) { - translatedParagraphs.push(result.get(i).getTranslatedText()); - translatedSentencesOfParagraphs.push(getAllTranslatedSentencesOfParagraph(result.get(i))); - sourceSentencesOfParagraphs.push(getAllSourceSentencesOfParagraph(result.get(i))); - } - log({ translatedParagraphs }); - log({ translatedSentencesOfParagraphs }); - log({ sourceSentencesOfParagraphs }); - - responseOptions.delete(); - input.delete(); - return translatedParagraphs; -} - -// This function extracts all the translated sentences from the Response and returns them. -const getAllTranslatedSentencesOfParagraph = (response) => { - const sentences = []; - const text = response.getTranslatedText(); - for (let sentenceIndex = 0; sentenceIndex < response.size(); sentenceIndex++) { - const utf8SentenceByteRange = response.getTranslatedSentence(sentenceIndex); - sentences.push(_getSentenceFromByteRange(text, utf8SentenceByteRange)); - } - return sentences; -} - -// This function extracts all the source sentences from the Response and returns them. -const getAllSourceSentencesOfParagraph = (response) => { - const sentences = []; - const text = response.getOriginalText(); - for (let sentenceIndex = 0; sentenceIndex < response.size(); sentenceIndex++) { - const utf8SentenceByteRange = response.getSourceSentence(sentenceIndex); - sentences.push(_getSentenceFromByteRange(text, utf8SentenceByteRange)); - } - return sentences; -} - -// This function returns a substring of text (a string). The substring is represented by -// byteRange (begin and end endices) within the utf-8 encoded version of the text. -const _getSentenceFromByteRange = (text, byteRange) => { - const utf8BytesView = encoder.encode(text); - const utf8SentenceBytes = utf8BytesView.subarray(byteRange.begin, byteRange.end); - return decoder.decode(utf8SentenceBytes); -}