Skip to content

Commit

Permalink
Merge pull request online-go#2810 from online-go/llm_translator
Browse files Browse the repository at this point in the history
Llm translator system
  • Loading branch information
anoek authored Sep 6, 2024
2 parents c2ffe6c + c48b699 commit f104b4b
Show file tree
Hide file tree
Showing 10 changed files with 308 additions and 50 deletions.
5 changes: 5 additions & 0 deletions .vscode/cspell.json
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,9 @@
"misclick",
"misclicks",
"modlog",
"msgctxt",
"msgid",
"msgstr",
"ngettext",
"nivo",
"nochallenge",
Expand All @@ -103,6 +105,7 @@
"nums",
"offtopic",
"omnisearch",
"openai",
"opengotha",
"pgettext",
"playouts",
Expand All @@ -124,6 +127,7 @@
"Sitewide",
"slowstrobe",
"sodos",
"strs",
"styl",
"sublicensable",
"subprops",
Expand Down Expand Up @@ -151,6 +155,7 @@
"unstarted",
"uservoice",
"usgc",
"vandalizations",
"wbaduk",
"Weiqi",
"xgettext",
Expand Down
7 changes: 6 additions & 1 deletion Makefile.production
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ dev-put:
dev-del:
curl -X DELETE $(BETA_INDEX_HEADERS) http://localhost:1080/_index

beta-put: sync-translations audit-translations jsonify-po-files push-cdn upload-sentry-source-maps
beta-put: sync-translations create-unified-pot audit-translations jsonify-po-files push-cdn upload-sentry-source-maps
OGS_VERSION_HASH=$(OGS_VERSION_HASH) VENDOR_HASH=$(VENDOR_HASH) npm run minify-index --silent | curl -X PUT $(BETA_INDEX_HEADERS) -d @- https://beta.online-go.com/_index
make beta-notify-slack

Expand Down Expand Up @@ -82,6 +82,11 @@ sync-translations:
cd i18n/build/; ssh pootle@pootle "bash -c 'cd online-go.com/i18n/build; tar jcf - *'" | tar jxf -
cd i18n/locale/; ssh pootle@pootle "bash -c 'cd online-go.com/i18n/locale; tar jcf - *'" | tar jxf -

create-unified-pot:
# This is run on the pootle server and here, the pootle server does most of the work,
# this invocation extracts strings that need to be translated by LLMs
cd i18n; node create-unified-pot.js llm-translation-extraction

audit-translations:
cd i18n; node audit-translations.js

Expand Down
16 changes: 16 additions & 0 deletions i18n/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,20 @@ build/countries.json: cldr/common
mkdir -p build
./gen-country-lists.py

dev-run-translation-scripts:
cp ../dist/ogs.js build/ogs.strings.js
cp ../dist/ogs.js.map build/ogs.strings.js.map
sed -i 's/ogs.js.map/ogs.strings.js.map/' build/ogs.strings.js
python translation-code-replace-for-parsing.py
# Create a unified pot file script is run on the translation server typically
node create-unified-pot.js llm-translation-extraction
node audit-translations.js
node jsonify-po-files.js

audit:
node audit-translations.js

po:
node jsonify-po-files.js

-include Makefile.production
73 changes: 71 additions & 2 deletions i18n/audit-translations.js
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
"use strict";

// cspell: words autotranslations conv "Türkçe" "hwat" "עִבְרִית" autotranslated autotranslation autotranslate

const deepl = require("deepl-node");
const fs = require("fs");
const PO = require("pofile");
const GoogleTranslate = require("@google-cloud/translate").v3.TranslationServiceClient;
const OpenAI = require("openai");

let keys = fs.existsSync("./keys.json") ? JSON.parse(fs.readFileSync("./keys.json")) : null;

Expand All @@ -15,6 +18,9 @@ const googleTranslate = keys
})
: null;

const openai = keys ? new OpenAI({ apiKey: keys.openai_api_key }) : null;
const OPENAI_MODEL = "gpt-4o";

let limit = 1;

/* We use emoji as placeholders for our auto-translations because the
Expand Down Expand Up @@ -164,7 +170,16 @@ async function main() {
JSON.stringify(translations_missing, null, 4),
);

if (deepl_translator && googleTranslate) {
// LLM translations
let llm_translations_needed = JSON.parse(fs.readFileSync("./build/llm-keys.json", "utf-8"));
for (let key in llm_translations_needed) {
for (let lang in languages) {
llm_translate(key, llm_translations_needed[key], lang, languages[lang]);
}
}

// Auto translate missing strs with deepl or google depending on language support
if (deepl_translator && googleTranslate && openai) {
if (Object.keys(vandalized_languages).length > 0) {
console.error(
`Critical error: ${
Expand Down Expand Up @@ -197,7 +212,9 @@ async function main() {
// code point stuff to reconstruct the string
// since indexing doesn't work as you'd hope
// here.
let k = String.fromCodePoint(emoji.codePointAt(2 * num_placeholders));
let k = String.fromCodePoint(
emoji.codePointAt(2 * num_placeholders),
);
//console.log(k);
placeholder_a_to_n[replacement] = k;
placeholder_n_to_a[k] = replacement;
Expand Down Expand Up @@ -383,6 +400,58 @@ function detect_profanity(lang, msg) {
return false;
}

//function llm_translate(entry: {msgctxt: string, msgid: string}) {
let LLM_CACHE = undefined;
async function llm_translate(key, entry, lang, language) {
if (language === "Debug") {
return "[" + entry.msgid + "]";
}

if (!LLM_CACHE) {
if (fs.existsSync("./llm-keys-cache.json")) {
LLM_CACHE = JSON.parse(fs.readFileSync("./llm-keys-cache.json", "utf-8"));
} else {
LLM_CACHE = {};
}
}

if (!(lang in LLM_CACHE)) {
LLM_CACHE[lang] = {};
}

if (key in LLM_CACHE[lang]) {
return LLM_CACHE[lang][key];
}

let completion = await openai.chat.completions.create({
messages: [
{
role: "system",
content:
"You are translating user interface strings from English to " +
language +
". Only include the translation in your response.",
},
{
role: "system",
content: "The context provided for the string is: " + entry.msgctxt ?? "",
},
{
role: "system",
content: "Translate the provided string from English to " + language,
},
{ role: "user", content: entry.msgid },
],
model: OPENAI_MODEL,
});

let translation = completion.choices[0].message.content;
LLM_CACHE[lang][key] = translation;
fs.writeFileSync("./llm-keys-cache.json", JSON.stringify(LLM_CACHE, null, 4));
console.log("LLM translation", entry.msgid, " -> ", translation);
return translation;
}

main()
.then(() => console.log("Done"))
.catch((err) => console.error(err));
Loading

0 comments on commit f104b4b

Please sign in to comment.