Skip to content

Commit

Permalink
Merged PR 30482: Fixes for backward compatibility in fine-tuning
Browse files Browse the repository at this point in the history
This PR fixes fine-tuning a model trained with an older version of Marian by:
- adding the removed option `num-devices` to the list of deprecated options
- checking if `loss-{arg,var}-{slow,fast}` are present in .progress.yml file
  • Loading branch information
Roman Grundkiewicz committed Jul 27, 2023
1 parent 717d351 commit e383583
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 4 deletions.
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
v1.12.10
v1.12.11
1 change: 1 addition & 0 deletions src/common/cli_wrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ namespace cli {
const std::unordered_set<std::string> DEPRECATED_OPTIONS = {
"version",
"special-vocab",
"num-devices",
// @TODO: uncomment once we actually deprecate them.
// "after-batches",
// "after-epochs"
Expand Down
10 changes: 7 additions & 3 deletions src/training/training_state.h
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,10 @@ class TrainingState {
void loadFromString(const std::string& yamlString) {
YAML::Node config = YAML::Load(yamlString);

// WARNING! When adding new options to the training state, make sure to
// check of their existance when loading from the progress.yml
// file for backward compatibility

epochs = config["epochs"].as<size_t>();
batches = config["batches"].as<size_t>();
batchesEpoch = config["batches-epoch"].as<size_t>();
Expand Down Expand Up @@ -241,9 +245,9 @@ class TrainingState {
samplesDisp = config["disp-samples"].as<size_t>();
updatesDisp = config["disp-updates"].as<size_t>();

lossAvgSlow = config["loss-avg-slow"].as<float>();
lossAvgFast = config["loss-avg-fast"].as<float>();
lossVarSlow = config["loss-var-slow"].as<float>();
lossAvgSlow = config["loss-avg-slow"] ? config["loss-avg-slow"].as<float>() : 0;
lossAvgFast = config["loss-avg-fast"] ? config["loss-avg-fast"].as<float>() : 0;
lossVarSlow = config["loss-var-slow"] ? config["loss-var-slow"].as<float>() : 0;

gradientNormAvg = config["gradient-norm-avg"].as<float>();
gradientNormVar = config["gradient-norm-var"].as<float>();
Expand Down

0 comments on commit e383583

Please sign in to comment.