Skip to content
This repository has been archived by the owner on Sep 4, 2023. It is now read-only.

Integrate Basic QE feature #144

Merged
merged 11 commits into from
Mar 11, 2022
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
function loadEmscriptenGlueCode(Module) {

var BERGAMOT_VERSION_FULL = "v0.4.1+9eb2437";
var BERGAMOT_VERSION_FULL = "v0.4.2+ab7f84f";

var Module = typeof Module !== "undefined" ? Module : {};

Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
/* eslint-disable */
// This file is autogenerated using scripts/update-begamot-version.sh
const BERGAMOT_VERSION_FULL = "v0.3.1+793d132";
const BERGAMOT_VERSION_FULL = "v0.4.2+ab7f84f";
83 changes: 58 additions & 25 deletions extension/controller/translation/translationWorker.js
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ class TranslationHelper {
};
}

async loadTranslationEngine(sourceLanguage, targetLanguage, withOutboundTranslation) {
async loadTranslationEngine(sourceLanguage, targetLanguage, withOutboundTranslation, withQualityEstimation) {
postMessage([
"updateProgress",
"loadingTranslationEngine"
Expand Down Expand Up @@ -66,7 +66,7 @@ class TranslationHelper {
* initialized, we then load the language models
*/
console.log(`Wasm Runtime initialized Successfully (preRun -> onRuntimeInitialized) in ${(Date.now() - this.wasmModuleStartTimestamp) / 1000} secs`);
this.loadLanguageModel(sourceLanguage, targetLanguage, withOutboundTranslation);
this.loadLanguageModel(sourceLanguage, targetLanguage, withOutboundTranslation, withQualityEstimation);
}.bind(this),
wasmBinary: wasmArrayBuffer,
};
Expand All @@ -86,7 +86,16 @@ class TranslationHelper {
let total_words = message[0].sourceParagraph.replace(/(<([^>]+)>)/gi, "").trim()
.split(/\s+/).length;
const t0 = performance.now();

/*
* quality scores are not required for outbound translation. So we set the
* corresponding flag to false before calling translate api and restore
* its value after the api call is complete.
*/
let originalQualityEstimation = message[0].withQualityEstimation;
message[0].withQualityEstimation = false;
const translationResultBatch = this.translate(message);
message[0].withQualityEstimation = originalQualityEstimation;
const timeElapsed = [total_words, performance.now() - t0];

message[0].translatedParagraph = translationResultBatch[0];
Expand All @@ -99,10 +108,12 @@ class TranslationHelper {
}.bind(this));
}

// eslint-disable-next-line max-lines-per-function
consumeTranslationQueue() {

while (this.translationQueue.length() > 0) {
const translationMessagesBatch = this.translationQueue.dequeue();
// eslint-disable-next-line max-lines-per-function
Promise.resolve().then(function () {
if (translationMessagesBatch) {
try {
Expand All @@ -113,11 +124,40 @@ class TranslationHelper {
total_words += words.length;
});

/*
* engine doesn't return QE scores for the translation of Non-HTML source
* messages. Therefore, always encode and pass source messages as HTML to the
* engine and restore them afterwards to their original form.
*/
const non_html_qe_messages = new Map();
translationMessagesBatch.forEach((message, index) => {
if (message.withQualityEstimation && !message.isHTML) {
console.log(`Plain text received to translate with QE: "${message.sourceParagraph}"`);
non_html_qe_messages.set(index, message.sourceParagraph);
const div = document.createElement("div");
div.appendChild(document.createTextNode(message.sourceParagraph));
message.sourceParagraph = div.innerHTML;
message.isHTML = true;
}
});

const t0 = performance.now();

const translationResultBatch = this.translate(translationMessagesBatch);
const timeElapsed = [total_words, performance.now() - t0];

/*
* restore Non-HTML source messages that were encoded to HTML before being sent to
* engine to get the QE scores for their translations. The translations are not
* required to be decoded back to non-HTML form because QE scores are embedded in
* the translation via html attribute.
*/
non_html_qe_messages.forEach((value, key) => {
console.log("Restoring back source text and html flag");
translationMessagesBatch[key].sourceParagraph = value;
translationMessagesBatch[key].isHTML = false;
});

/*
* now that we have the paragraphs back, let's reconstruct them.
* we trust the engine will return the paragraphs always in the same order
Expand All @@ -139,7 +179,7 @@ class TranslationHelper {
throw e;
}
}
}.bind(this));
}.bind(this));
}
}

Expand All @@ -161,7 +201,8 @@ class TranslationHelper {
this.loadTranslationEngine(
message[0].sourceLanguage,
message[0].targetLanguage,
message[0].withOutboundTranslation
message[0].withOutboundTranslation,
message[0].withQualityEstimation
);

this.translationQueue.enqueue(message);
Expand Down Expand Up @@ -194,7 +235,7 @@ class TranslationHelper {
}

// eslint-disable-next-line max-lines-per-function
async loadLanguageModel(sourceLanguage, targetLanguage, withOutboundTranslation) {
async loadLanguageModel(sourceLanguage, targetLanguage, withOutboundTranslation, withQualityEstimation) {

/*
* let's load the models and communicate to the caller (translation)
Expand All @@ -204,11 +245,12 @@ class TranslationHelper {
let isReversedModelLoadingFailed = false;
try {
this.constructTranslationService();
await this.constructTranslationModel(sourceLanguage, targetLanguage);
await this.constructTranslationModel(sourceLanguage, targetLanguage, withQualityEstimation);

if (withOutboundTranslation) {
try {
await this.constructTranslationModel(targetLanguage, sourceLanguage);
// the Outbound Translation doesn't require supporting Quality Estimation
await this.constructTranslationModel(targetLanguage, sourceLanguage, /* withQualityEstimation=*/false);
postMessage([
"displayOutboundTranslation",
null
Expand Down Expand Up @@ -246,7 +288,7 @@ class TranslationHelper {
// instantiate the Translation Service
constructTranslationService() {
if (!this.translationService) {
let translationServiceConfig = {};
let translationServiceConfig = { cacheSize: 10 };
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a note: a cache size of 10 won't do you much good. The cache is non-probing, it is basically just cache[hash(sentence) % cacheSize]. The description of the parameter is a bit indirect about that. If you set it too low, you'll end up having too many different sentences hitting the same cache entry, constantly overwriting each other, and no cache benefit at all.

In my experience you'd get about 20% occupancy, so if you set it to 50 you'd be caching about 10 sentences. But from testing in TranslateLocally and my extension fork, I'd suggest starting with something around 1000 or higher, and see whether you can notice it in the memory usage.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the tip Jelmer, please keep them coming. I think caching will be particularly helpful after I move the engine to the background script last week. I heard from the security folks that's fine, so I think that will be helpful.

console.log(`Creating Translation Service with config: ${translationServiceConfig}`);
this.translationService = new this.WasmEngineModule.BlockingService(translationServiceConfig);
console.log("Translation Service created successfully");
Expand All @@ -262,24 +304,25 @@ class TranslationHelper {
this.translationModels.clear();
}

async constructTranslationModel(from, to) {
async constructTranslationModel(from, to, withQualityEstimation) {
if (this._isPivotingRequired(from, to)) {
// pivoting requires 2 translation models to be constructed
const languagePairSrcToPivot = this._getLanguagePair(from, this.PIVOT_LANGUAGE);
const languagePairPivotToTarget = this._getLanguagePair(this.PIVOT_LANGUAGE, to);
await Promise.all([
this.constructTranslationModelHelper(languagePairSrcToPivot),
this.constructTranslationModelHelper(languagePairPivotToTarget)
this.constructTranslationModelHelper(languagePairSrcToPivot, withQualityEstimation),
this.constructTranslationModelHelper(languagePairPivotToTarget, withQualityEstimation)
]);
} else {
// non-pivoting case requires only 1 translation model
await this.constructTranslationModelHelper(this._getLanguagePair(from, to));
await this.constructTranslationModelHelper(this._getLanguagePair(from, to), withQualityEstimation);
}
}

// eslint-disable-next-line max-lines-per-function
async constructTranslationModelHelper(languagePair) {
async constructTranslationModelHelper(languagePair, withQualityEstimation) {
console.log(`Constructing translation model ${languagePair}`);
const modelConfigQualityEstimation = !withQualityEstimation;

/*
* for available configuration options,
Expand All @@ -294,7 +337,7 @@ class TranslationHelper {
mini-batch-words: 1024
workspace: 128
max-length-factor: 2.0
skip-cost: true
skip-cost: ${modelConfigQualityEstimation}
cpu-threads: 0
quiet: true
quiet-translation: true
Expand Down Expand Up @@ -563,18 +606,8 @@ class TranslationHelper {
const vectorResponseOptions = new this.WasmEngineModule.VectorResponseOptions();
// eslint-disable-next-line no-unused-vars
messages.forEach(message => {

/*
* toDo: Activate this code once translate options can be passed per message
* const translateOptions = message.translateOptions;
* vectorResponseOptions.push_back({
* qualityScores: message.withQualityEstimation,
* alignment: true,
* html: message.isHtml
* });
*/
vectorResponseOptions.push_back({
qualityScores: false,
qualityScores: message.withQualityEstimation,
alignment: true,
html: message.isHTML,
});
Expand Down
6 changes: 3 additions & 3 deletions extension/model/engineRegistry.js
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
/* eslint-disable no-unused-vars */

let engineRegistryRootURL = "https://github.com/mozilla/bergamot-translator/releases/download/v0.4.1/";
let engineRegistryRootURL = "https://github.com/mozilla/bergamot-translator/releases/download/v0.4.2/";
const engineRegistryRootURLTest = "https://example.com/browser/browser/extensions/translations/test/browser/";

const engineRegistry = {
bergamotTranslatorWasm: {
fileName: "bergamot-translator-worker-with-wormhole.wasm",
fileSize: 6923132,
sha256: "26c24d3695eab02e91f20c68a8c1f7587d2a3e338a6f057f407c39a7e6c2c321"
fileSize: 6929232,
sha256: "cccc905f2ccbdbf13d38e640151f06f04ed5568f34fd1ee1a6c4bf73147e2c3f"
}
}
13 changes: 12 additions & 1 deletion extension/view/js/InPageTranslation.js
Original file line number Diff line number Diff line change
Expand Up @@ -500,7 +500,18 @@ class InPageTranslation {
let counterpart = dstChildNodes[child.dataset.xBergamotId];

if (!counterpart) {
console.warn(`[InPlaceTranslation] ${this.computePath(child, scratch)} Could not find counterpart for`, child.dataset.xBergamotId, dstChildNodes, child);

/*
* if translated element child doesn't have data-x-bergamot-id attribute and
* has quality score specific attributes (that are set by translation engine
* when QE is on) then just add the translated element child to the live
* element node.
*/
if (!child.hasAttribute("data-x-bergamot-id") && (child.hasAttribute("x-bergamot-sentence-index") || child.hasAttribute("x-bergamot-word-index"))) {
dst.appendChild(child);
} else {
console.warn(`[InPlaceTranslation] ${this.computePath(child, scratch)} Could not find counterpart for`, child.dataset.xBergamotId, dstChildNodes, child);
}
return;
}

Expand Down