Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update the README.md to refer to build-wasm.sh and point to the developer docs #463

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
- checkout

- run:
name: Build WASM
name: Build Wasm
command: |
bash build-wasm.sh

Expand Down Expand Up @@ -77,5 +77,3 @@ workflows:
ignore: /.*/
requires:
- build

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My editor enforces the single newline at the end of the files on save. I hope this isn't an issue.


2 changes: 1 addition & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ jobs:
run: |
ccache -s # Print current cache stats

- name: Import GEMM library from a separate wasm module
- name: Import GEMM library from a separate Wasm module
working-directory: build-wasm
run: bash ../wasm/patch-artifacts-import-gemm-module.sh

Expand Down
21 changes: 10 additions & 11 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ endif()

if(NOT COMPILE_WASM)
# Setting BUILD_ARCH to native invokes CPU intrinsic detection logic below.
# Prevent invoking that logic for WASM builds.
# Prevent invoking that logic for Wasm builds.
set(BUILD_ARCH native CACHE STRING "Compile for this CPU architecture.")

# Unfortunately MSVC supports a limited subset of BUILD_ARCH flags. Instead try to guess
Expand Down Expand Up @@ -68,10 +68,10 @@ endif(MSVC)
include(CMakeDependentOption)

# Project specific cmake options
option(COMPILE_WASM "Compile for WASM" OFF)
cmake_dependent_option(USE_WASM_COMPATIBLE_SOURCE "Use wasm compatible sources" OFF "NOT COMPILE_WASM" ON)
option(COMPILE_WASM "Compile for Wasm" OFF)
cmake_dependent_option(USE_WASM_COMPATIBLE_SOURCE "Use Wasm compatible sources" OFF "NOT COMPILE_WASM" ON)

# WASM disables a million libraries, which also includes the unit test-library.
# Wasm disables a million libraries, which also includes the unit test-library.
cmake_dependent_option(COMPILE_UNIT_TESTS "Compile unit tests" OFF "USE_WASM_COMPATIBLE_SOURCE" ON)
option(COMPILE_TESTS "Compile bergamot-tests" OFF)
cmake_dependent_option(ENABLE_CACHE_STATS "Enable stats on cache" ON "COMPILE_TESTS" OFF)
Expand All @@ -85,7 +85,7 @@ SET(SSPLIT_COMPILE_LIBRARY_ONLY ON CACHE BOOL "Do not compile ssplit tests")
if (USE_WASM_COMPATIBLE_SOURCE)
SET(COMPILE_LIBRARY_ONLY ON CACHE BOOL "Build only the Marian library and exclude all executables.")
SET(USE_MKL OFF CACHE BOOL "Compile with MKL support")
# # Setting the ssplit-cpp submodule specific cmake options for wasm
# # Setting the ssplit-cpp submodule specific cmake options for Wasm
SET(SSPLIT_USE_INTERNAL_PCRE2 ON CACHE BOOL "Use internal PCRE2 instead of system PCRE2")
endif()

Expand Down Expand Up @@ -115,9 +115,9 @@ if(COMPILE_WASM)
# See https://github.com/emscripten-core/emscripten/blob/main/src/settings.js
list(APPEND WASM_COMPILE_FLAGS
-O3
# Preserve whitespaces in JS even for release builds; this doesn't increase wasm binary size
# Preserve whitespaces in JS even for release builds; this doesn't increase Wasm binary size
$<$<CONFIG:Release>:-g1>
# Relevant Debug info only for release with debug builds as this increases wasm binary size
# Relevant Debug info only for release with debug builds as this increases Wasm binary size
$<$<CONFIG:RelWithDebInfo>:-g2>
-fPIC
-mssse3
Expand All @@ -128,9 +128,9 @@ if(COMPILE_WASM)
)
list(APPEND WASM_LINK_FLAGS
-O3
# Preserve whitespaces in JS even for release builds; this doesn't increase wasm binary size
# Preserve whitespaces in JS even for release builds; this doesn't increase Wasm binary size
$<$<CONFIG:Release>:-g1>
# Relevant Debug info only for release with debug builds as this increases wasm binary size
# Relevant Debug info only for release with debug builds as this increases Wasm binary size
$<$<CONFIG:RelWithDebInfo>:-g2>
-lembind
# Save some code, and some speed
Expand All @@ -154,7 +154,7 @@ if(COMPILE_WASM)
# Export all of the intgemm functions in case we need to fall back to using the embedded intgemm
-sEXPORTED_FUNCTIONS=[_int8PrepareAFallback,_int8PrepareBFallback,_int8PrepareBFromTransposedFallback,_int8PrepareBFromQuantizedTransposedFallback,_int8PrepareBiasFallback,_int8MultiplyAndAddBiasFallback,_int8SelectColumnsOfBFallback]
# Necessary for mozintgemm linking. This prepares the `wasmMemory` variable ahead of time as
# opposed to delegating that task to the wasm binary itself. This way we can link MozIntGEMM
# opposed to delegating that task to the Wasm binary itself. This way we can link MozIntGEMM
# module to the same memory as the main bergamot-translator module.
-sIMPORTED_MEMORY=1
# Dynamic execution is either frowned upon or blocked inside browser extensions
Expand All @@ -180,4 +180,3 @@ option(COMPILE_PYTHON "Compile python bindings. Intended to be activated with se
if(COMPILE_PYTHON)
add_subdirectory(bindings/python)
endif(COMPILE_PYTHON)

54 changes: 11 additions & 43 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@

[![CircleCI badge](https://img.shields.io/circleci/project/github/browsermt/bergamot-translator/main.svg?label=CircleCI)](https://circleci.com/gh/browsermt/bergamot-translator/)

Bergamot translator provides a unified API for ([Marian NMT](https://marian-nmt.github.io/) framework based) neural machine translation functionality in accordance with the [Bergamot](https://browser.mt/) project that focuses on improving client-side machine translation in a web browser.
Bergamot translator provides a unified API for ([Marian NMT](https://marian-nmt.github.io/) framework based) neural machine translation functionality in accordance with the [Bergamot](https://browser.mt/) project that focuses on improving client-side machine translation in a web browser. Read more about this project in the [developer documentation](https://browser.mt/docs/main/index.html).

## Build Instructions

### Build Natively

Create a folder where you want to build all the artifacts (`build-native` in this case) and compile

```bash
Expand All @@ -16,67 +17,34 @@ cmake ../
make -j2
```

### Build WASM
#### Prerequisite

Building on wasm requires Emscripten toolchain. It can be downloaded and installed using following instructions:

* Get the latest sdk: `git clone https://github.com/emscripten-core/emsdk.git`
* Enter the cloned directory: `cd emsdk`
* Install the sdk: `./emsdk install 3.1.8`
* Activate the sdk: `./emsdk activate 3.1.8`
* Activate path variables: `source ./emsdk_env.sh`

#### <a name="Compile"></a> Compile

To build a version that translates with higher speeds on Firefox Nightly browser, follow these instructions:

1. Create a folder where you want to build all the artifacts (`build-wasm` in this case) and compile
```bash
mkdir build-wasm
cd build-wasm
emcmake cmake -DCOMPILE_WASM=on ../
emmake make -j2
```
For more detailed build instructions read the [Bergamot C++ Library](https://browser.mt/docs/main/marian-integration.html) docs.

The wasm artifacts (.js and .wasm files) will be available in the build directory ("build-wasm" in this case).
### Build Wasm

2. Patch generated artifacts to import GEMM library from a separate wasm module
```bash
bash ../wasm/patch-artifacts-import-gemm-module.sh
```
The process for building Wasm is controlled by the `build-wasm.sh` script. This script downloads the emscripten toolchain and generates the build artifacts in the `build-wasm` folder.

To build a version that runs on all browsers (including Firefox Nightly) but translates slowly, follow these instructions:

1. Create a folder where you want to build all the artifacts (`build-wasm` in this case) and compile
```bash
mkdir build-wasm
cd build-wasm
emcmake cmake -DCOMPILE_WASM=on ../
emmake make -j2
```
```bash
./build-wasm.sh
```

2. Patch generated artifacts to import GEMM library from a separate wasm module
```bash
bash ../wasm/patch-artifacts-import-gemm-module.sh
```
For more information on running the Wasm see [Using Bergamot Translator in JavaScript](https://browser.mt/docs/main/wasm-example.html).

#### Recompiling

As long as you don't update any submodule, just follow [Compile](#Compile) steps.\
If you update a submodule, execute following command in repository root folder before executing
[Compile](#Compile) steps.
```bash
git submodule update --init --recursive
```


## How to use

### Using Native version

The builds generate library that can be integrated to any project. All the public header files are specified in `src` folder.\
A short example of how to use the APIs is provided in `app/bergamot.cpp` file.

### Using WASM version
### Using Wasm version

Please follow the `README` inside the `wasm` folder of this repository that demonstrates how to use the translator in JavaScript.
2 changes: 1 addition & 1 deletion build-wasm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ cd ${BUILD_DIRECTORY}
emcmake cmake -DCOMPILE_WASM=on ../
emmake make -j2

# 2. Import GEMM library from a separate wasm module
# 2. Import GEMM library from a separate Wasm module
bash ../wasm/patch-artifacts-import-gemm-module.sh

# The artifacts (.js and .wasm files) will be available in the build directory
Expand Down
2 changes: 1 addition & 1 deletion doc/marian-integration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ MKL/OpenBLAS.
Building bergamot-translator
~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Web Assembly (WASM) reduces building to only using a subset of
Web Assembly (Wasm) reduces building to only using a subset of
functionalities of marian, the translation library powering
bergamot-translator. When developing bergamot-translator it is important
that the sources added be compatible with marian. Therefore, it is
Expand Down
2 changes: 1 addition & 1 deletion src/tests/blocking.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ int main(int argc, char *argv[]) {
models.push_back(model);
}

/// WASM is one special case where WASM path is being checked, involving translateMultiple and a multi-line feed.
/// Wasm is one special case where Wasm path is being checked, involving translateMultiple and a multi-line feed.
/// Hence we do not bind it at a single input-blob single Response constraint imposed by the TestSuite.
testSuite.run(config.opMode, models);

Expand Down
6 changes: 3 additions & 3 deletions src/tests/wasm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ void wasm(BlockingService &service, std::shared_ptr<TranslationModel> &model) {
std::vector<ResponseOptions> responseOptions;
std::vector<std::string> texts;

// WASM always requires HTML and alignment.
// Wasm always requires HTML and alignment.
// TODO(jerinphilip): Fix this, bring in actual tests.
// responseOptions.HTML = true;
// responseOptions.alignment = true; // Necessary for HTML
Expand Down Expand Up @@ -35,14 +35,14 @@ int main(int argc, char *argv[]) {

for (auto &modelConfigPath : config.modelConfigPaths) {
TranslationModel::Config modelConfig = parseOptionsFromFilePath(modelConfigPath);
// Anything WASM is expected to use the byte-array-loads. So we hard-code grabbing MemoryBundle from FS and use the
// Anything Wasm is expected to use the byte-array-loads. So we hard-code grabbing MemoryBundle from FS and use the
// MemoryBundle capable constructor.
MemoryBundle memoryBundle = getMemoryBundleFromConfig(modelConfig);
std::shared_ptr<TranslationModel> model = std::make_shared<TranslationModel>(modelConfig, std::move(memoryBundle));
models.push_back(model);
}

/// WASM is one special case where WASM path is being checked, involving translateMultiple and a multi-line feed.
/// Wasm is one special case where Wasm path is being checked, involving translateMultiple and a multi-line feed.
/// Hence we do not bind it at a single input-blob single Response constraint imposed by the TestSuite.
if (config.opMode == "wasm") {
wasm(service, models.front());
Expand Down
2 changes: 1 addition & 1 deletion src/translator/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ add_library(bergamot-translator STATIC
xh_scanner.cpp
)
if (USE_WASM_COMPATIBLE_SOURCE)
# Using wasm compatible sources should include this compile definition;
# Using Wasm compatible sources should include this compile definition;
# Has to be done here because we are including marian headers + some sources
# in local repository use these definitions
target_compile_definitions(bergamot-translator PUBLIC USE_SSE2 WASM_COMPATIBLE_SOURCE)
Expand Down
2 changes: 1 addition & 1 deletion wasm/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ add_executable(bergamot-translator-worker
bindings/response_bindings.cpp
)

# Generate version file that can be included in the wasm artifacts
# Generate version file that can be included in the Wasm artifacts
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/project_version.js.in
${CMAKE_CURRENT_BINARY_DIR}/project_version.js @ONLY)

Expand Down
4 changes: 2 additions & 2 deletions wasm/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ All the instructions below are meant to run from the current directory.

## Using JS APIs

See [node-test.js](./node-test.js) for an annotated example of how to use the WASM module. Most of the code from it can also be used in a browser context.
See [node-test.js](./node-test.js) for an annotated example of how to use the Wasm module. Most of the code from it can also be used in a browser context.

Alternatively refer to the file `test_page/js/worker.js` that demonstrates how to use the bergamot translator in JavaScript via a `<script>` tag.

Expand All @@ -30,7 +30,7 @@ Alternatively refer to the file `test_page/js/worker.js` that demonstrates how t
bash start_server.sh ../../build-wasm
```

Provide the folder containing the wasm artifacts as the first argument of `start_server.sh` script (`../../build-wasm` in this case).
Provide the folder containing the Wasm artifacts as the first argument of `start_server.sh` script (`../../build-wasm` in this case).

* Open any browser (tested with latest Chrome/Firefox/Safari)

Expand Down
4 changes: 2 additions & 2 deletions wasm/module/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ const translator = new LatencyOptimisedTranslator({
- `workerUrl` - url to `translator-worker.js`. Defaults to `"worker/translator-worker.js"` relative to the path of `translator.js`.
- `downloadTimeout` - Maximum time we're attempting to download model files before failing. Defaults to `60000` or 60 seconds. Set to `0` to disable.
- `cacheSize` - Maximum number of sentences in kept translation cache (per worker, workers do not share their cache). This is an ideal maximum as it is a hash-map, in practice about 1/3th is occupied. If set to `0`, translation cache is disabled (the default).
- `useNativeIntGemm` - Try to link to native IntGEMM implementation when loading the WASM binary. This is only implemented in the privileged extension context of Firefox Nightly. If it fails, it will always fall back to the included implementation. Defaults to `false`.
- `useNativeIntGemm` - Try to link to native IntGEMM implementation when loading the Wasm binary. This is only implemented in the privileged extension context of Firefox Nightly. If it fails, it will always fall back to the included implementation. Defaults to `false`.

### translate()

Expand Down Expand Up @@ -235,4 +235,4 @@ const translator = new BatchTranslator(options, new CustomBacking(options));

# Supported languages

See https://github.com/mozilla/firefox-translations-models#currently-supported-languages. You may need to set the `registryUrl` option to point to the latest release.
See https://github.com/mozilla/firefox-translations-models#currently-supported-languages. You may need to set the `registryUrl` option to point to the latest release.
8 changes: 4 additions & 4 deletions wasm/module/translator.js
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ export class CancelledError extends Error {}
* call and that are unrecoverable.
* @type {(error: Error)}
*/
this.onerror = this.options.onerror || (err => console.error('WASM Translation Worker error:', err));
this.onerror = this.options.onerror || (err => console.error('Wasm Translation Worker error:', err));
}

/**
Expand Down Expand Up @@ -164,7 +164,7 @@ export class CancelledError extends Error {}
// … and general errors
worker.addEventListener('error', this.onerror.bind(this));

// Await initialisation. This will also nicely error out if the WASM
// Await initialisation. This will also nicely error out if the Wasm
// runtime fails to load.
await call('initialize', this.options);

Expand Down Expand Up @@ -248,7 +248,7 @@ export class CancelledError extends Error {}
/**
* Downloads a translation model and returns a set of
* ArrayBuffers. These can then be passed to a TranslationWorker thread
* to instantiate a TranslationModel inside the WASM vm.
* to instantiate a TranslationModel inside the Wasm vm.
* @param {{from:string, to:string}}
* @param {{signal:AbortSignal?}?}
* @return {Promise<{
Expand Down Expand Up @@ -520,7 +520,7 @@ export class BatchTranslator {
*/
this.batchSize = Math.max(options?.batchSize || 8, 1);

this.onerror = options?.onerror || (err => console.error('WASM Translation Worker error:', err));
this.onerror = options?.onerror || (err => console.error('Wasm Translation Worker error:', err));
}

/**
Expand Down
16 changes: 8 additions & 8 deletions wasm/module/worker/translator-worker.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Wrapper around the dirty bits of Bergamot's WASM bindings.
* Wrapper around the dirty bits of Bergamot's Wasm bindings.
*/

// Global because importScripts is global.
Expand Down Expand Up @@ -170,7 +170,7 @@ class BergamotTranslatorWorker {

/**
* Instantiates a new translation worker with optional options object.
* If this call succeeds, the WASM runtime is loaded and ready.
* If this call succeeds, the Wasm runtime is loaded and ready.
*
* Available options are:
* useNativeIntGemm: {true | false} defaults to false. If true, it will
Expand All @@ -193,7 +193,7 @@ class BergamotTranslatorWorker {
/**
* Tries to load native IntGEMM module for bergamot-translator. If that
* fails because it or any of the expected functions is not available, it
* falls back to using the naive implementations that come with the wasm
* falls back to using the naive implementations that come with the Wasm
* binary itself through `linkFallbackIntGemm()`.
* @param {{env: {memory: WebAssembly.Memory}}} info
* @return {{[method:string]: (...any) => any}}
Expand All @@ -217,7 +217,7 @@ class BergamotTranslatorWorker {
}

/**
* Links intgemm functions that are already available in the wasm binary,
* Links intgemm functions that are already available in the Wasm binary,
* but just exports them under the name that is expected by
* bergamot-translator.
* @param {{env: {memory: WebAssembly.Memory}}} info
Expand All @@ -232,7 +232,7 @@ class BergamotTranslatorWorker {
}

/**
* Internal method. Reads and instantiates the WASM binary. Returns a
* Internal method. Reads and instantiates the Wasm binary. Returns a
* promise for the exported Module object that contains all the classes
* and functions exported by bergamot-translator.
* @return {Promise<BergamotTranslator>}
Expand Down Expand Up @@ -337,7 +337,7 @@ class BergamotTranslatorWorker {
if (buffers.config)
Object.assign(modelConfig, buffers.config);

// WASM marian is only compiled with support for shiftedAll.
// Wasm marian is only compiled with support for shiftedAll.
if (modelConfig['gemm-precision'] === 'int8')
modelConfig['gemm-precision'] = 'int8shiftAll';

Expand Down Expand Up @@ -375,7 +375,7 @@ class BergamotTranslatorWorker {

/**
* Internal function. Copies the data from an ArrayBuffer into memory that
* can be used inside the WASM vm by Marian.
* can be used inside the Wasm vm by Marian.
* @param {{ArrayBuffer}} buffer
* @param {number} alignmentSize
* @return {BergamotTranslator.AlignedMemory}
Expand Down Expand Up @@ -417,7 +417,7 @@ class BergamotTranslatorWorker {
input.delete();
options.delete();

// Convert the Response WASM wrappers into native JavaScript types we
// Convert the Response Wasm wrappers into native JavaScript types we
// can send over the 'wire' (message passing) in the same format as we
// use in bergamot-translator.
const translations = texts.map((_, i) => ({
Expand Down
Loading