Skip to content
This repository has been archived by the owner on Feb 13, 2021. It is now read-only.

Commit

Permalink
Added support for Bergamot translator
Browse files Browse the repository at this point in the history
Signed-off-by: kdavis-mozilla <[email protected]>
  • Loading branch information
kdavis-mozilla committed Sep 10, 2019
1 parent 588814f commit d8b268f
Show file tree
Hide file tree
Showing 6 changed files with 327 additions and 5 deletions.
4 changes: 2 additions & 2 deletions browser/app/profile/firefox.js
Original file line number Diff line number Diff line change
Expand Up @@ -1531,8 +1531,8 @@ pref("browser.translation.detectLanguage", false);
pref("browser.translation.neverForLanguages", "");
// Show the translation UI bits, like the info bar, notification icon and preferences.
pref("browser.translation.ui.show", false);
// Allows to define the translation engine. Google is default, Bing or Yandex are other options.
pref("browser.translation.engine", "Google");
// Allows to define the translation engine. Bergamot is default, Google, Bing, or Yandex are other options.
pref("browser.translation.engine", "Bergamot");

// Telemetry settings.
// Determines if Telemetry pings can be archived locally.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@ var whitelist = [
{ file: "resource://pdf.js/web/debugger.js" },

// resource://app/modules/translation/TranslationContentHandler.jsm
{ file: "resource://app/modules/translation/BergamotTranslator.jsm" },
{ file: "resource://app/modules/translation/BingTranslator.jsm" },
{ file: "resource://app/modules/translation/GoogleTranslator.jsm" },
{ file: "resource://app/modules/translation/YandexTranslator.jsm" },
Expand Down
319 changes: 319 additions & 0 deletions browser/components/translation/BergamotTranslator.jsm
Original file line number Diff line number Diff line change
@@ -0,0 +1,319 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */

"use strict";

var EXPORTED_SYMBOLS = ["BergamotTranslator"];

const { Services } = ChromeUtils.import("resource://gre/modules/Services.jsm");
const { PromiseUtils } = ChromeUtils.import(
"resource://gre/modules/PromiseUtils.jsm"
);
const { httpRequest } = ChromeUtils.import("resource://gre/modules/Http.jsm");
const { XPCOMUtils } = ChromeUtils.import(
"resource://gre/modules/XPCOMUtils.jsm"
);
XPCOMUtils.defineLazyGlobalGetters(this, ["DOMParser"]);

// The maximum amount of net data allowed per request on Bergamot's API.
const MAX_REQUEST_DATA = 5000; // XXX This is the Bing value

// The maximum number of chunks allowed to be translated in a single
// request.
const MAX_REQUEST_CHUNKS = 1; // Undocumented, but the de facto upper limit.

// Self-imposed limit of 1920 requests. This means that a page that would need
// to be broken in more than 1920 requests won't be fully translated.
// The maximum amount of data that we will translate for a single page
// is MAX_REQUESTS * MAX_REQUEST_DATA.
const MAX_REQUESTS = 1920;

const URL = "http://demo.statmt.org/api/elg/v1";

/**
* Translates a webpage using Bergamot's Translation API.
*
* @param translationDocument The TranslationDocument object that represents
* the webpage to be translated
* @param sourceLanguage The source language of the document
* @param targetLanguage The target language for the translation
*
* @returns {Promise} A promise that will resolve when the translation
* task is finished.
*/
var BergamotTranslator = function(
translationDocument,
sourceLanguage,
targetLanguage
) {
this.translationDocument = translationDocument;
this.sourceLanguage = sourceLanguage;
this.targetLanguage = targetLanguage;
this._pendingRequests = 0;
this._partialSuccess = false;
this._translatedCharacterCount = 0;
};

this.BergamotTranslator.prototype = {
/**
* Performs the translation, splitting the document into several chunks
* respecting the data limits of the API.
*
* @returns {Promise} A promise that will resolve when the translation
* task is finished.
*/
async translate() {
let currentIndex = 0;
this._onFinishedDeferred = PromiseUtils.defer();

// Let's split the document into various requests to be sent to
// Bergamot's Translation API.
for (let requestCount = 0; requestCount < MAX_REQUESTS; requestCount++) {
// Generating the text for each request can be expensive, so
// let's take the opportunity of the chunkification process to
// allow for the event loop to attend other pending events
// before we continue.
await new Promise(resolve => Services.tm.dispatchToMainThread(resolve));

// Determine the data for the next request.
let request = this._generateNextTranslationRequest(currentIndex);

// Create a real request to the server, and put it on the
// pending requests list.
let bergamotRequest = new BergamotRequest(
request.data,
this.sourceLanguage,
this.targetLanguage
);
this._pendingRequests++;
bergamotRequest
.fireRequest()
.then(this._chunkCompleted.bind(this), this._chunkFailed.bind(this));

currentIndex = request.lastIndex;
if (request.finished) {
break;
}
}

return this._onFinishedDeferred.promise;
},

/**
* Function called when a request sent to the server completed successfully.
* This function handles calling the function to parse the result and the
* function to resolve the promise returned by the public `translate()`
* method when there's no pending request left.
*
* @param request The BergamotRequest sent to the server.
*/
_chunkCompleted(bergamotRequest) {
if (this._parseChunkResult(bergamotRequest)) {
this._partialSuccess = true;
// Count the number of characters successfully translated.
this._translatedCharacterCount += bergamotRequest.characterCount;
}

this._checkIfFinished();
},

/**
* Function called when a request sent to the server has failed.
* This function handles deciding if the error is transient or means the
* service is unavailable (zero balance on the key or request credentials are
* not in an active state) and calling the function to resolve the promise
* returned by the public `translate()` method when there's no pending.
* request left.
*
* @param aError [optional] The XHR object of the request that failed.
*/
_chunkFailed(aError) {
this._checkIfFinished();
},

/**
* Function called when a request sent to the server has completed.
* This function handles resolving the promise
* returned by the public `translate()` method when all chunks are completed.
*/
_checkIfFinished() {
// Check if all pending requests have been
// completed and then resolves the promise.
// If at least one chunk was successful, the
// promise will be resolved positively which will
// display the "Success" state for the infobar. Otherwise,
// the "Error" state will appear.
if (--this._pendingRequests == 0) {
if (this._partialSuccess) {
this._onFinishedDeferred.resolve({
characterCount: this._translatedCharacterCount,
});
} else {
this._onFinishedDeferred.reject("failure");
}
}
},

/**
* This function parses the result returned by Bing's Http.svc API,
* which is a XML file that contains a number of elements. To our
* particular interest, the only part of the response that matters
* are the <TranslatedText> nodes, which contains the resulting
* items that were sent to be translated.
*
* @param request The request sent to the server.
* @returns boolean True if parsing of this chunk was successful.
*/
_parseChunkResult(bergamotRequest) {
let results;
try {
let response = bergamotRequest.networkRequest.response;
results = JSON.parse(response);
} catch (e) {
return false;
}
if (results.failure) {
return false;
}

let error = false;
try {
let result = results.response.texts[0]["text"];
let root = bergamotRequest.translationData[0][0];
if (root.isSimpleRoot && result.includes("&")) {
// If the result contains HTML entities, we need to convert them as
// simple roots expect a plain text result.
let doc = new DOMParser().parseFromString(result, "text/html");
result = doc.body.firstChild.nodeValue;
}
root.parseResult(result);
} catch (e) {
error = true;
}

return !error;
},

/**
* This function will determine what is the data to be used for
* the Nth request we are generating, based on the input params.
*
* @param startIndex What is the index, in the roots list, that the
* chunk should start.
*/
_generateNextTranslationRequest(startIndex) {
let currentDataSize = 0;
let currentChunks = 0;
let output = [];
let rootsList = this.translationDocument.roots;

for (let i = startIndex; i < rootsList.length; i++) {
let root = rootsList[i];
let text = this.translationDocument.generateTextForItem(root);
if (!text) {
continue;
}

let newCurSize = currentDataSize + text.length;
let newChunks = currentChunks + 1;

if (newCurSize > MAX_REQUEST_DATA || newChunks > MAX_REQUEST_CHUNKS) {
// If we've reached the API limits, let's stop accumulating data
// for this request and return. We return information useful for
// the caller to pass back on the next call, so that the function
// can keep working from where it stopped.
return {
data: output,
finished: false,
lastIndex: i,
};
}

currentDataSize = newCurSize;
currentChunks = newChunks;
output.push([root, text]);
}

return {
data: output,
finished: true,
lastIndex: 0,
};
},
};

/**
* Represents a request (for 1 chunk) sent off to Bergamot's service.
*
* @params translationData The data to be used for this translation,
* generated by the generateNextTranslationRequest...
* function.
* @param sourceLanguage The source language of the document.
* @param targetLanguage The target language for the translation.
*
*/
function BergamotRequest(translationData, sourceLanguage, targetLanguage) {
this.translationData = translationData;
this.sourceLanguage = sourceLanguage;
this.targetLanguage = targetLanguage;
this.characterCount = 0;
}

BergamotRequest.prototype = {
/**
* Initiates the request
*/
fireRequest() {
// Prepare the headers
let headers = [
["Content-Type", "application/json"],
]

// Prepare the post data
let postData = {
"metadata" : { "id": 0 },
"request" : {
"type" : "text",
"mimeType" : "text/plain",
"characterEncoding" : "UTF-8",
}
};

// Prepare the content of the post
for (let [, text] of this.translationData) {
// The next line is a hack to delay dealing with the problem of
// <b>Do not</b> touch.
// being translated to something like
// <b>Ne</b> touche <b>pas</b>.
// The server can only deal with pure text. The client has no
// knowledge of semantics. So it can not remove the tags and
// replace them as it doesn't know how to insert them in to
// the translated result. So as a hack we just remove the
// tags and hope the formatting is not too bad.
text = text.replace(/<[^>]*>?/gm, '');
postData["request"]["content"] = text;
this.characterCount += text.length;
}

// Convert the post to a string
postData = JSON.stringify(postData);

// Set up request options.
return new Promise((resolve, reject) => {
let options = {
onLoad: (responseText, xhr) => {
resolve(this);
},
onError(e, responseText, xhr) {
reject(xhr);
},
postData,
headers,
};

// Fire the request.
this.networkRequest = httpRequest(URL, options);
});
},
};
5 changes: 3 additions & 2 deletions browser/components/translation/Translation.jsm
Original file line number Diff line number Diff line change
Expand Up @@ -123,15 +123,16 @@ var Translation = {
*/
supportedEngines: {
Google: "",
Bergamot: "https://browser.mt/",
Bing: "http://aka.ms/MicrosoftTranslatorAttribution",
Yandex: "http://translate.yandex.com/",
},

/**
* Fallback engine (currently Google) if the preferences seem confusing.
* Fallback engine (currently Bergamot) if the preferences seem confusing.
*/
get defaultEngine() {
return Object.keys(this.supportedEngines)[0];
return Object.keys(this.supportedEngines)[1];
},

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ class MozTranslationNotification extends MozElements.Notification {
let engineIndex = Object.keys(Translation.supportedEngines).indexOf(
Translation.translationEngine
);
// We currently only have attribution for the Bing and Yandex engines.
// We currently only have attribution for the Bergamot, Bing, and Yandex engines.
if (engineIndex >= 0) {
--engineIndex;
}
Expand Down
1 change: 1 addition & 0 deletions browser/components/translation/moz.build
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ with Files("**"):
BUG_COMPONENT = ("Firefox", "Translation")

EXTRA_JS_MODULES.translation = [
'BergamotTranslator.jsm',
'BingTranslator.jsm',
'cld2/cld-worker.js',
'cld2/cld-worker.js.mem',
Expand Down

0 comments on commit d8b268f

Please sign in to comment.