diff --git a/.vscode/cspell.json b/.vscode/cspell.json index 4c28bbf400..3f2850a793 100644 --- a/.vscode/cspell.json +++ b/.vscode/cspell.json @@ -105,6 +105,7 @@ "nums", "offtopic", "omnisearch", + "openai", "opengotha", "pgettext", "playouts", diff --git a/Makefile.production b/Makefile.production index 8c327b76f0..82273be2c7 100644 --- a/Makefile.production +++ b/Makefile.production @@ -44,7 +44,7 @@ dev-put: dev-del: curl -X DELETE $(BETA_INDEX_HEADERS) http://localhost:1080/_index -beta-put: sync-translations audit-translations jsonify-po-files push-cdn upload-sentry-source-maps +beta-put: sync-translations create-unified-pot audit-translations jsonify-po-files push-cdn upload-sentry-source-maps OGS_VERSION_HASH=$(OGS_VERSION_HASH) VENDOR_HASH=$(VENDOR_HASH) npm run minify-index --silent | curl -X PUT $(BETA_INDEX_HEADERS) -d @- https://beta.online-go.com/_index make beta-notify-slack @@ -82,6 +82,11 @@ sync-translations: cd i18n/build/; ssh pootle@pootle "bash -c 'cd online-go.com/i18n/build; tar jcf - *'" | tar jxf - cd i18n/locale/; ssh pootle@pootle "bash -c 'cd online-go.com/i18n/locale; tar jcf - *'" | tar jxf - +create-unified-pot: + # This is run on the pootle server and here, the pootle server does most of the work, + # this invocation extracts strings that need to be translated by LLMs + cd i18n; node create-unified-pot.js llm-translation-extraction + audit-translations: cd i18n; node audit-translations.js diff --git a/i18n/Makefile b/i18n/Makefile index d3d7afe610..a9f36548bd 100644 --- a/i18n/Makefile +++ b/i18n/Makefile @@ -14,4 +14,20 @@ build/countries.json: cldr/common mkdir -p build ./gen-country-lists.py +dev-run-translation-scripts: + cp ../dist/ogs.js build/ogs.strings.js + cp ../dist/ogs.js.map build/ogs.strings.js.map + sed -i 's/ogs.js.map/ogs.strings.js.map/' build/ogs.strings.js + python translation-code-replace-for-parsing.py + # Create a unified pot file script is run on the translation server typically + node create-unified-pot.js llm-translation-extraction + node audit-translations.js + node jsonify-po-files.js + +audit: + node audit-translations.js + +po: + node jsonify-po-files.js + -include Makefile.production diff --git a/i18n/audit-translations.js b/i18n/audit-translations.js index 91da8521a8..5e3340f2da 100644 --- a/i18n/audit-translations.js +++ b/i18n/audit-translations.js @@ -6,6 +6,7 @@ const deepl = require("deepl-node"); const fs = require("fs"); const PO = require("pofile"); const GoogleTranslate = require("@google-cloud/translate").v3.TranslationServiceClient; +const OpenAI = require("openai"); let keys = fs.existsSync("./keys.json") ? JSON.parse(fs.readFileSync("./keys.json")) : null; @@ -17,6 +18,9 @@ const googleTranslate = keys }) : null; +const openai = keys ? new OpenAI({ apiKey: keys.openai_api_key }) : null; +const OPENAI_MODEL = "gpt-4o"; + let limit = 1; /* We use emoji as placeholders for our auto-translations because the @@ -166,7 +170,16 @@ async function main() { JSON.stringify(translations_missing, null, 4), ); - if (deepl_translator && googleTranslate) { + // LLM translations + let llm_translations_needed = JSON.parse(fs.readFileSync("./build/llm-keys.json", "utf-8")); + for (let key in llm_translations_needed) { + for (let lang in languages) { + llm_translate(key, llm_translations_needed[key], lang, languages[lang]); + } + } + + // Auto translate missing strs with deepl or google depending on language support + if (deepl_translator && googleTranslate && openai) { if (Object.keys(vandalized_languages).length > 0) { console.error( `Critical error: ${ @@ -387,6 +400,58 @@ function detect_profanity(lang, msg) { return false; } +//function llm_translate(entry: {msgctxt: string, msgid: string}) { +let LLM_CACHE = undefined; +async function llm_translate(key, entry, lang, language) { + if (language === "Debug") { + return "[" + entry.msgid + "]"; + } + + if (!LLM_CACHE) { + if (fs.existsSync("./llm-keys-cache.json")) { + LLM_CACHE = JSON.parse(fs.readFileSync("./llm-keys-cache.json", "utf-8")); + } else { + LLM_CACHE = {}; + } + } + + if (!(lang in LLM_CACHE)) { + LLM_CACHE[lang] = {}; + } + + if (key in LLM_CACHE[lang]) { + return LLM_CACHE[lang][key]; + } + + let completion = await openai.chat.completions.create({ + messages: [ + { + role: "system", + content: + "You are translating user interface strings from English to " + + language + + ". Only include the translation in your response.", + }, + { + role: "system", + content: "The context provided for the string is: " + entry.msgctxt ?? "", + }, + { + role: "system", + content: "Translate the provided string from English to " + language, + }, + { role: "user", content: entry.msgid }, + ], + model: OPENAI_MODEL, + }); + + let translation = completion.choices[0].message.content; + LLM_CACHE[lang][key] = translation; + fs.writeFileSync("./llm-keys-cache.json", JSON.stringify(LLM_CACHE, null, 4)); + console.log("LLM translation", entry.msgid, " -> ", translation); + return translation; +} + main() .then(() => console.log("Done")) .catch((err) => console.error(err)); diff --git a/i18n/create-unified-pot.js b/i18n/create-unified-pot.js index feefbff8de..662f3c3089 100644 --- a/i18n/create-unified-pot.js +++ b/i18n/create-unified-pot.js @@ -1,3 +1,13 @@ +/* +This script is executed in two places. + +The first is on our translation server in a cron job to recompute translation +strings and insert them into the pootle server for translation. + +The second is during the deployment process prior to audit-translations, which +performs the automatic translation of strings. +*/ + "use strict"; const fs = require("fs"); @@ -5,6 +15,15 @@ const XGettext = require("xgettext-js"); const SourceMapConsumer = require("source-map").SourceMapConsumer; const PO = require("pofile"); +const MODE = process.argv[2] || "full"; + +if (MODE !== "full" && MODE !== "llm-translation-extraction") { + console.error('Invalid mode, expecting "full" or "llm-translation-extraction"'); + process.exit(1); +} + +console.log("Running in mode:", MODE); + main(); function pseudo_translate(str) { @@ -46,6 +65,7 @@ function main() { function prep(match) { let ret = { + llm: false, line: match.line, column: match.column, source: sourcemap.originalPositionFor({ @@ -78,6 +98,17 @@ function main() { return ret; } + function llm_ctxt(match) { + let ret = prep(match); + ret.llm = true; + ret.msgctxt = match.arguments[0].value; + ret.msgid = match.arguments[1].value; + if (match.arguments.length > 2) { + ret.msgid_plural = match.arguments[2].value; + } + return ret; + } + let source = data; let parser = new XGettext({ keywords: { @@ -97,6 +128,7 @@ function main() { } let po_items = {}; let ui_only_keys = {}; + let llm_keys = {}; for (let item of po.items) { let key = item.msgctxt ? item.msgctxt + "\x04" : ""; key += item.msgid; @@ -109,6 +141,7 @@ function main() { for (let m of parser.getMatches(source)) { if (m.msgid == "") { + console.log("Skipping blank translation"); console.log(m); continue; } @@ -124,6 +157,14 @@ function main() { } ui_only_keys[key] = 1; + if (m.llm) { + llm_keys[key] = { + msgctxt: m.msgctxt, + msgid: m.msgid, + msgid_plural: m.msgid_plural, + }; + continue; + } if (!(key in po_items)) { po_items[key] = new PO.Item(); @@ -145,9 +186,14 @@ function main() { } } - fs.writeFile("build/llm-keys.json", JSON.stringify(llm_keys), () => - console.log("build/llm-keys-ui-keys.json written"), - ); + fs.writeFileSync("build/llm-keys.json", JSON.stringify(llm_keys, undefined, 4)); + console.log("build/llm-keys-ui-keys.json written"); + + if (MODE === "llm-translation-extraction") { + console.log("llm-translation-extraction mode complete, exiting"); + process.exit(0); + } + fs.writeFile("build/ogs-ui-keys.json", JSON.stringify(ui_only_keys), () => console.log("build/ogs-ui-keys.json written"), ); diff --git a/i18n/jsonify-po-files.js b/i18n/jsonify-po-files.js index ed70caa5e1..e61929373c 100644 --- a/i18n/jsonify-po-files.js +++ b/i18n/jsonify-po-files.js @@ -15,6 +15,12 @@ async function main() { const autotranslations = fs.existsSync("./autotranslations.json") ? JSON.parse(await fs.promises.readFile("./autotranslations.json", "utf-8")) : {}; + const llm_cache = fs.existsSync("./llm-keys-cache.json") + ? JSON.parse(await fs.promises.readFile("./llm-keys-cache.json", "utf-8")) + : {}; + const llm_needed = fs.existsSync("./build/llm-keys.json") + ? JSON.parse(await fs.promises.readFile("./build/llm-keys.json", "utf-8")) + : {}; for (let lang in languages) { console.log(`Processing ${lang}`); @@ -57,6 +63,16 @@ async function main() { } } + if (lang in llm_cache) { + for (let key in llm_needed) { + if (key in llm_cache[lang]) { + result[key] = [llm_cache[lang][key]]; + } else { + console.error(`Missing LLM translation for ${key}`); + } + } + } + let json = JSON.stringify(result, undefined, 1); let country_map = {}; diff --git a/package.json b/package.json index efc06cffa6..640bfae622 100644 --- a/package.json +++ b/package.json @@ -75,6 +75,7 @@ "jest-chain": "^1.1.6", "jest-environment-jsdom": "^29.7.0", "lint-staged": "^15.2.2", + "openai": "^4.57.3", "pofile": "^1.1.0", "postcss": "^8.4.16", "postcss-inline-svg": "^5.0.0", diff --git a/src/lib/translate.ts b/src/lib/translate.ts index 5961385bad..7710f28318 100644 --- a/src/lib/translate.ts +++ b/src/lib/translate.ts @@ -213,6 +213,20 @@ export function pgettext(context: string, msgid: string) { return debug_wrap(msgid); } +/** + * Like pgettext(), but these strings will be automatically translated instead of passed along to our volunteers. + * + * The context is fed into the LLM system as general instructions and context. + * The msgid is the message that needs to be translated. + */ +export function llm_pgettext(context: string, msgid: string) { + const key = context + "" + msgid; + if (key in catalog) { + return catalog[key][0]; + } + return debug_wrap(msgid); +} + /** * Like pgettext() but for plural forms. */ diff --git a/src/views/HelpFlows/GameLogHelp.tsx b/src/views/HelpFlows/GameLogHelp.tsx index 565e4d3725..b48ac5763f 100644 --- a/src/views/HelpFlows/GameLogHelp.tsx +++ b/src/views/HelpFlows/GameLogHelp.tsx @@ -19,7 +19,7 @@ import React from "react"; import { HelpFlow, HelpItem } from "react-dynamic-help"; -import { _, pgettext } from "translate"; +import { llm_pgettext } from "translate"; /** * A help flow intended for moderators and CMs wondering about mysterious auto score entries @@ -31,11 +31,12 @@ export function GameLogHelp(): JSX.Element { id="game-log-help" showInitially={true} debug={true} - description={pgettext("Name of a dynamic help flow", "Game Log Help")} + description={llm_pgettext("Name of a dynamic help flow", "Game Log Help")} >
- {_( + {llm_pgettext( + "", "These come from the user's browser during autoscoring. Two of these from each user, at the beginning of the scoring phase, and if the users presses 'auto-score'", )}
diff --git a/yarn.lock b/yarn.lock index f003b2aa42..e5d69c4589 100644 --- a/yarn.lock +++ b/yarn.lock @@ -2133,6 +2133,14 @@ dependencies: moment ">=2.14.0" +"@types/node-fetch@^2.6.4": + version "2.6.11" + resolved "https://registry.yarnpkg.com/@types/node-fetch/-/node-fetch-2.6.11.tgz#9b39b78665dae0e82a08f02f4967d62c66f95d24" + integrity sha512-24xFj9R5+rfQJLRyM56qh+wnVSYhyXC2tkoBndtY0U+vubqNsYXGjufB2nn8Q6gt0LrARwL6UBtMCSVCwl4B1g== + dependencies: + "@types/node" "*" + form-data "^4.0.0" + "@types/node@*", "@types/node@>=12", "@types/node@>=12.0", "@types/node@>=12.12.47", "@types/node@>=13.7.0": version "20.11.30" resolved "https://registry.yarnpkg.com/@types/node/-/node-20.11.30.tgz#9c33467fc23167a347e73834f788f4b9f399d66f" @@ -2140,6 +2148,13 @@ dependencies: undici-types "~5.26.4" +"@types/node@^18.11.18": + version "18.19.50" + resolved "https://registry.yarnpkg.com/@types/node/-/node-18.19.50.tgz#8652b34ee7c0e7e2004b3f08192281808d41bf5a" + integrity sha512-xonK+NRrMBRtkL1hVCc3G+uXtjh1Al4opBLjqVmipe5ZAaBYWW6cNAiBVZ1BvmkBhep698rP3UM3aRAdSALuhg== + dependencies: + undici-types "~5.26.4" + "@types/normalize-package-data@^2.4.0": version "2.4.4" resolved "https://registry.yarnpkg.com/@types/normalize-package-data/-/normalize-package-data-2.4.4.tgz#56e2cc26c397c038fab0e3a917a12d5c5909e901" @@ -2160,6 +2175,11 @@ resolved "https://registry.yarnpkg.com/@types/prop-types/-/prop-types-15.7.11.tgz#2596fb352ee96a1379c657734d4b913a613ad563" integrity sha512-ga8y9v9uyeiLdpKddhxYQkxNDrfvuPrlFb0N1qnZZByvcElJaXthF1UhvCh9TLWJBEHeNtdnbysW7Y6Uq8CVng== +"@types/qs@^6.9.15": + version "6.9.15" + resolved "https://registry.yarnpkg.com/@types/qs/-/qs-6.9.15.tgz#adde8a060ec9c305a82de1babc1056e73bd64dce" + integrity sha512-uXHQKES6DQKKCLh441Xv/dwxOq1TVS3JPUMlEqoEglvlhR6Mxnlew/Xq/LRVHpLyk7iK3zODe1qYHIMltO7XGg== + "@types/query-string@^5.0.1": version "5.1.0" resolved "https://registry.yarnpkg.com/@types/query-string/-/query-string-5.1.0.tgz#7f40cdea49ddafa0ea4f3db35fb6c24d3bfd4dcc" @@ -2629,6 +2649,13 @@ agent-base@6: dependencies: debug "4" +agentkeepalive@^4.2.1: + version "4.5.0" + resolved "https://registry.yarnpkg.com/agentkeepalive/-/agentkeepalive-4.5.0.tgz#2673ad1389b3c418c5a20c5d7364f93ca04be923" + integrity sha512-5GG/5IbQQpC9FpkRGsSvZI5QYeSCzlJHdpBQntCsuTOxhKD8lqKhrleg2Yi7yvMIf82Ycmmqln9U8V9qwEiJew== + dependencies: + humanize-ms "^1.2.1" + ajv-keywords@^3.5.2: version "3.5.2" resolved "https://registry.yarnpkg.com/ajv-keywords/-/ajv-keywords-3.5.2.tgz#31f29da5ab6e00d1c2d329acf7b5929614d5014d" @@ -5664,6 +5691,11 @@ fork-ts-checker-webpack-plugin@^9.0.0: semver "^7.3.5" tapable "^2.2.1" +form-data-encoder@1.7.2: + version "1.7.2" + resolved "https://registry.yarnpkg.com/form-data-encoder/-/form-data-encoder-1.7.2.tgz#1f1ae3dccf58ed4690b86d87e4f57c654fbab040" + integrity sha512-qfqtYan3rxrnCk1VYaA4H+Ms9xdpPqvLZa6xmMgFvhO32x7/3J/ExcTd6qpxM0vH2GdMI+poehyBZvqfMTto8A== + form-data@^3.0.0: version "3.0.1" resolved "https://registry.yarnpkg.com/form-data/-/form-data-3.0.1.tgz#ebd53791b78356a99af9a300d4282c4d5eb9755f" @@ -5682,6 +5714,14 @@ form-data@^4.0.0: combined-stream "^1.0.8" mime-types "^2.1.12" +formdata-node@^4.3.2: + version "4.4.1" + resolved "https://registry.yarnpkg.com/formdata-node/-/formdata-node-4.4.1.tgz#23f6a5cb9cb55315912cbec4ff7b0f59bbd191e2" + integrity sha512-0iirZp3uVDjVGt9p49aTaqjk84TrglENEDuqfdlZQ1roC9CWlPk6Avf8EEnZNcAqPonwkG35x4n3ww/1THYAeQ== + dependencies: + node-domexception "1.0.0" + web-streams-polyfill "4.0.0-beta.3" + forwarded@0.2.0: version "0.2.0" resolved "https://registry.yarnpkg.com/forwarded/-/forwarded-0.2.0.tgz#2269936428aad4c15c7ebe9779a84bf0b2a81811" @@ -6363,6 +6403,13 @@ human-signals@^5.0.0: resolved "https://registry.yarnpkg.com/human-signals/-/human-signals-5.0.0.tgz#42665a284f9ae0dade3ba41ebc37eb4b852f3a28" integrity sha512-AXcZb6vzzrFAUE61HnN4mpLqd/cSIwNQjtNWR0euPm6y0iqx3G4gOXaIDdtdDwZmhwe82LA6+zinmW4UBWVePQ== +humanize-ms@^1.2.1: + version "1.2.1" + resolved "https://registry.yarnpkg.com/humanize-ms/-/humanize-ms-1.2.1.tgz#c46e3159a293f6b896da29316d8b6fe8bb79bbed" + integrity sha512-Fl70vYtsAFb/C06PTS9dZBo7ihau+Tu/DNCk/OyHhea07S+aeMWpFFkUaXRa8fI+ScZbEI8dfSxwY7gxZ9SAVQ== + dependencies: + ms "^2.0.0" + husky@^8.0.1: version "8.0.3" resolved "https://registry.yarnpkg.com/husky/-/husky-8.0.3.tgz#4936d7212e46d1dea28fef29bb3a108872cd9184" @@ -8074,7 +8121,7 @@ ms@2.1.2: resolved "https://registry.yarnpkg.com/ms/-/ms-2.1.2.tgz#d09d1f357b443f493382a8eb3ccd183872ae6009" integrity sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w== -ms@2.1.3, ms@^2.1.1: +ms@2.1.3, ms@^2.0.0, ms@^2.1.1: version "2.1.3" resolved "https://registry.yarnpkg.com/ms/-/ms-2.1.3.tgz#574c8138ce1d2b5861f0b44579dbadd60c6615b2" integrity sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA== @@ -8121,7 +8168,12 @@ node-abort-controller@^3.0.1: resolved "https://registry.yarnpkg.com/node-abort-controller/-/node-abort-controller-3.1.1.tgz#a94377e964a9a37ac3976d848cb5c765833b8548" integrity sha512-AGK2yQKIjRuqnc6VkX2Xj5d+QW8xZ87pa1UK6yA6ouUyuxfHuMP6umE5QK7UmTeOAymo+Zx1Fxiuw9rVx8taHQ== -node-fetch@^2.6.1, node-fetch@^2.6.9: +node-domexception@1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/node-domexception/-/node-domexception-1.0.0.tgz#6888db46a1f71c0b76b3f7555016b63fe64766e5" + integrity sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ== + +node-fetch@^2.6.1, node-fetch@^2.6.7, node-fetch@^2.6.9: version "2.7.0" resolved "https://registry.yarnpkg.com/node-fetch/-/node-fetch-2.7.0.tgz#d0f0fa6e3e2dc1d27efcd8ad99d550bda94d187d" integrity sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A== @@ -8319,6 +8371,21 @@ onetime@^6.0.0: dependencies: mimic-fn "^4.0.0" +openai@^4.57.3: + version "4.57.3" + resolved "https://registry.yarnpkg.com/openai/-/openai-4.57.3.tgz#e8dc0973c112a348a7cb29a92d59111831db9787" + integrity sha512-mTz5/SmulkkeSpqbSr6WNLRU6krkyhnbfRUC8XfaXbj1T6xUorKEELjZvbRSzI714JLOk1MeFkqYS9H4WHhqDQ== + dependencies: + "@types/node" "^18.11.18" + "@types/node-fetch" "^2.6.4" + "@types/qs" "^6.9.15" + abort-controller "^3.0.0" + agentkeepalive "^4.2.1" + form-data-encoder "1.7.2" + formdata-node "^4.3.2" + node-fetch "^2.6.7" + qs "^6.10.3" + opener@^1.5.2: version "1.5.2" resolved "https://registry.yarnpkg.com/opener/-/opener-1.5.2.tgz#5d37e1f35077b9dcac4301372271afdeb2a13598" @@ -9011,6 +9078,13 @@ qs@6.11.0: dependencies: side-channel "^1.0.4" +qs@^6.10.3: + version "6.13.0" + resolved "https://registry.yarnpkg.com/qs/-/qs-6.13.0.tgz#6ca3bd58439f7e245655798997787b0d88a51906" + integrity sha512-+38qI9SOr8tfZ4QmJNplMUxqjbe7LKvvZgWdExBOmd+egZTtjLB67Gu0HRX3u/XOq7UU2Nx6nsjvS16Z9uwfpg== + dependencies: + side-channel "^1.0.6" + qs@^6.4.0: version "6.12.0" resolved "https://registry.yarnpkg.com/qs/-/qs-6.12.0.tgz#edd40c3b823995946a8a0b1f208669c7a200db77" @@ -10850,6 +10924,11 @@ watchpack@^2.4.1: glob-to-regexp "^0.4.1" graceful-fs "^4.1.2" +web-streams-polyfill@4.0.0-beta.3: + version "4.0.0-beta.3" + resolved "https://registry.yarnpkg.com/web-streams-polyfill/-/web-streams-polyfill-4.0.0-beta.3.tgz#2898486b74f5156095e473efe989dcf185047a38" + integrity sha512-QW95TCTaHmsYfHDybGMwO5IJIM93I/6vTRk+daHTWFPhwh+C8Cg7j7XyKrwrj8Ib6vYXe0ocYNrmzY4xAAN6ug== + webidl-conversions@^3.0.0: version "3.0.1" resolved "https://registry.yarnpkg.com/webidl-conversions/-/webidl-conversions-3.0.1.tgz#24534275e2a7bc6be7bc86611cc16ae0a5654871"