-
Notifications
You must be signed in to change notification settings - Fork 38
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Multiple TranslationModels Implementation (#210)
For outbound translation, we require having multiple models in the inventory at the same time and abstracting the "how-to-translate" using a model out. Reorganization: TranslationModel + Service. The new entity which contains everything required to translate in one direction is `TranslationModel`. The how-to-translate blocking single-threaded mode of operation or async multi-threaded mode of operation is decoupled as `BlockingService` and `AsyncService`. There is a new regression-test using multiple models in conjunction added, also serving as a demonstration for using multiple models in Outbound Translation. WASM: WebAssembly due to the inability to use threads uses `BlockingService. Bindings are provided with a new API to work with a Service, and multiple TranslationModels which the client (JS extension) can inventory and maintain. Ownership of a given `TranslationModel` is shared while translations using the model are active in the internal mechanism. Config-Parsing: So far bergamot-translator has been hijacking marian's config-parsing mechanisms. However, in order to support multiple models, it has become impractical to continue this approach and a new config-parsing that is bergamot specific is provisioned for command-line applications constituting tests. The original marian config-parsing tooling is only associated with a subset of `TranslationModel` now. The new config-parsing for the library manages workers and other common options (tentatively). There is a known issue of: Inefficient placing of workspaces, leading to more memory usage than what's necessary. This is to be fixed trickling down from marian-dev in a later pull request. This PR also brings in BRT changes which fix speed-tests that were broken and also fixes some QE outputs which were different due to not using shortlist.
- Loading branch information
1 parent
63120c1
commit cf541c6
Showing
29 changed files
with
1,068 additions
and
641 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,18 +1,22 @@ | ||
#include "cli.h" | ||
|
||
int main(int argc, char *argv[]) { | ||
auto cp = marian::bergamot::createConfigParser(); | ||
auto options = cp.parseOptions(argc, argv, true); | ||
const std::string mode = options->get<std::string>("bergamot-mode"); | ||
marian::bergamot::ConfigParser configParser; | ||
configParser.parseArgs(argc, argv); | ||
auto &config = configParser.getConfig(); | ||
using namespace marian::bergamot; | ||
if (mode == "wasm") { | ||
app::wasm(options); | ||
} else if (mode == "native") { | ||
app::native(options); | ||
} else if (mode == "decoder") { | ||
app::decoder(options); | ||
} else { | ||
ABORT("Unknown --mode {}. Use one of: {wasm,native,decoder}", mode); | ||
switch (config.opMode) { | ||
case OpMode::APP_WASM: | ||
app::wasm(config); | ||
break; | ||
case OpMode::APP_NATIVE: | ||
app::native(config); | ||
break; | ||
case OpMode::APP_DECODER: | ||
app::decoder(config); | ||
break; | ||
default: | ||
break; | ||
} | ||
return 0; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Submodule bergamot-translator-tests
updated
15 files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,23 +1,45 @@ | ||
|
||
#include "apps.h" | ||
|
||
int main(int argc, char *argv[]) { | ||
auto cp = marian::bergamot::createConfigParser(); | ||
auto options = cp.parseOptions(argc, argv, true); | ||
const std::string mode = options->get<std::string>("bergamot-mode"); | ||
using namespace marian::bergamot; | ||
if (mode == "test-response-source-sentences") { | ||
testapp::annotatedTextSentences(options, /*source=*/true); | ||
} else if (mode == "test-response-target-sentences") { | ||
testapp::annotatedTextSentences(options, /*source=*/false); | ||
} else if (mode == "test-response-source-words") { | ||
testapp::annotatedTextWords(options, /*source=*/true); | ||
} else if (mode == std::string("test-quality-estimator-words")) { | ||
testapp::qualityEstimatorWords(options); | ||
} else if (mode == std::string("test-quality-estimator-scores")) { | ||
testapp::qualityEstimatorScores(options); | ||
} else { | ||
ABORT("Unknown --mode {}. Please run a valid test", mode); | ||
marian::bergamot::ConfigParser configParser; | ||
configParser.parseArgs(argc, argv); | ||
auto &config = configParser.getConfig(); | ||
AsyncService::Config serviceConfig{config.numWorkers}; | ||
AsyncService service(serviceConfig); | ||
std::vector<std::shared_ptr<TranslationModel>> models; | ||
|
||
for (auto &modelConfigPath : config.modelConfigPaths) { | ||
TranslationModel::Config modelConfig = parseOptionsFromFilePath(modelConfigPath); | ||
std::shared_ptr<TranslationModel> model = service.createCompatibleModel(modelConfig); | ||
models.push_back(model); | ||
} | ||
|
||
switch (config.opMode) { | ||
case OpMode::TEST_SOURCE_SENTENCES: | ||
testapp::annotatedTextSentences(service, models.front(), /*source=*/true); | ||
break; | ||
case OpMode::TEST_TARGET_SENTENCES: | ||
testapp::annotatedTextSentences(service, models.front(), /*source=*/false); | ||
break; | ||
case OpMode::TEST_SOURCE_WORDS: | ||
testapp::annotatedTextWords(service, models.front(), /*source=*/true); | ||
break; | ||
case OpMode::TEST_TARGET_WORDS: | ||
testapp::annotatedTextWords(service, models.front(), /*source=*/false); | ||
break; | ||
case OpMode::TEST_FORWARD_BACKWARD_FOR_OUTBOUND: | ||
testapp::forwardAndBackward(service, models); | ||
break; | ||
case OpMode::TEST_QUALITY_ESTIMATOR_WORDS: | ||
testapp::qualityEstimatorWords(service, models.front()); | ||
break; | ||
case OpMode::TEST_QUALITY_ESTIMATOR_SCORES: | ||
testapp::qualityEstimatorScores(service, models.front()); | ||
break; | ||
default: | ||
ABORT("Incompatible op-mode. Choose one of the test modes."); | ||
break; | ||
} | ||
return 0; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.