From 3ab35d99266a2cde54a0afb6457bcc5cd9c24a3a Mon Sep 17 00:00:00 2001 From: "Sean (Seok-Won) Yi" Date: Sat, 6 Jul 2024 22:02:18 +0900 Subject: [PATCH 01/19] Add _determine_best_metric and new saving logic. 1. Logic to determine the best logic was separated out from `_save_checkpoint`. 2. In `_maybe_log_save_evaluate`, whether or not a new best metric was achieved is determined after each evaluation, and if the save strategy is "best' then the TrainerControl is updated accordingly. --- src/transformers/trainer.py | 84 +++++++++++++++++++++++++------------ 1 file changed, 57 insertions(+), 27 deletions(-) diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index 1b13787007e9c3..5c1a000fa95dc1 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -120,6 +120,7 @@ IntervalStrategy, PredictionOutput, RemoveColumnsCollator, + SaveStrategy, TrainerMemoryTracker, TrainOutput, check_target_module_exists, @@ -2995,6 +2996,13 @@ def _maybe_log_save_evaluate(self, tr_loss, grad_norm, model, trial, epoch, igno metrics = None if self.control.should_evaluate: metrics = self._evaluate(trial, ignore_keys_for_eval) + new_best_metric = self._determine_best_metric(metrics=metrics, trial=trial) + + if self.args.save_strategy == SaveStrategy.BEST: + if new_best_metric: + self.control.should_save = True + else: + self.control_should_save = False if self.control.should_save: self._save_checkpoint(model, trial, metrics=metrics) @@ -3074,7 +3082,54 @@ def _load_rng_state(self, checkpoint): "\nThis won't yield the same results as if the training had not been interrupted." ) - def _save_checkpoint(self, model, trial, metrics=None): + def _determine_best_metric(self, metrics, trial): + """ + Determine if the model should be saved based on the evaluation metrics. + If args.metric_for_best_model is not set, the loss is used. + + Returns: + bool: True if a new best metric was found, else False + """ + new_best_metric = False + + if self.args.metric_for_best_model is not None: + metric_to_check = self.args.metric_for_best_model + + if not metric_to_check.startswith("eval_"): + metric_to_check = f"eval_{metric_to_check}" + + try: + metric_value = metrics[metric_to_check] + except KeyError as exc: + raise KeyError( + f"The `metric_for_best_model` training argument is set to '{metric_to_check}', which is not found in the evaluation metrics. " + f"The available evaluation metrics are: {list(metrics.keys())}. Consider changing the `metric_for_best_model` via the TrainingArguments." + ) from exc + + if self.args.greater_is_better: + operator = np.greater + else: + operator = np.less + else: + metric_value = metrics["eval_loss"] + operator = np.less + + if ( + self.state.best_metric is None + or operator(metric_value, self.state.best_metric) + ): + run_dir = self._get_output_dir(trial=trial) + checkpoint_folder = f"{PREFIX_CHECKPOINT_DIR}-{self.state.global_step}" + output_dir = os.path.join(run_dir, checkpoint_folder) + + self.state.best_metric = metric_value + self.state.best_model_checkpoint = output_dir + + new_best_metric = True + + return new_best_metric + + def _save_checkpoint(self, model, trial): # In all cases, including ddp/dp/deepspeed, self.model is always a reference to the model we # want to save except FullyShardedDDP. # assert unwrap_model(model) is self.model, "internal model should be a reference to self.model" @@ -3095,31 +3150,6 @@ def _save_checkpoint(self, model, trial, metrics=None): # Save RNG state self._save_rng_state(output_dir) - # Determine the new best metric / best model checkpoint - if metrics is not None and self.args.metric_for_best_model is not None: - metric_to_check = self.args.metric_for_best_model - if not metric_to_check.startswith("eval_"): - metric_to_check = f"eval_{metric_to_check}" - try: - metric_value = metrics[metric_to_check] - except KeyError as exc: - raise KeyError( - f"The `metric_for_best_model` training argument is set to '{metric_to_check}', " - f"which is not found in the evaluation metrics. " - f"The available evaluation metrics are: {list(metrics.keys())}. " - f"Please ensure that the `compute_metrics` function returns a dictionary that includes '{metric_to_check}' or " - f"consider changing the `metric_for_best_model` via the TrainingArguments." - ) from exc - - operator = np.greater if self.args.greater_is_better else np.less - if ( - self.state.best_metric is None - or self.state.best_model_checkpoint is None - or operator(metric_value, self.state.best_metric) - ): - self.state.best_metric = metric_value - self.state.best_model_checkpoint = output_dir - # Save the Trainer state if self.args.should_save: # Update `ExportableState` callbacks and `TrainerControl` state to where we are currently @@ -4540,7 +4570,7 @@ def _push_from_checkpoint(self, checkpoint_folder): # Same for the training arguments torch.save(self.args, os.path.join(output_dir, TRAINING_ARGS_NAME)) - if self.args.save_strategy == IntervalStrategy.STEPS: + if self.args.save_strategy == SaveStrategy.STEPS: commit_message = f"Training in progress, step {self.state.global_step}" else: commit_message = f"Training in progress, epoch {int(self.state.epoch)}" From 47ec1dd88479a57f0b8c8157737bd2c2e93dc3c9 Mon Sep 17 00:00:00 2001 From: "Sean (Seok-Won) Yi" Date: Sat, 6 Jul 2024 22:04:34 +0900 Subject: [PATCH 02/19] Added SaveStrategy. Same as IntervalStrategy, but with a new attribute called BEST. --- src/transformers/trainer_utils.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/transformers/trainer_utils.py b/src/transformers/trainer_utils.py index 02c298cf7d2e65..42088cd730628d 100644 --- a/src/transformers/trainer_utils.py +++ b/src/transformers/trainer_utils.py @@ -227,6 +227,13 @@ class IntervalStrategy(ExplicitEnum): EPOCH = "epoch" +class SaveStrategy(ExplicitEnum): + NO = "no" + STEPS = "steps" + EPOCH = "epoch" + BEST = "best" + + class EvaluationStrategy(ExplicitEnum): NO = "no" STEPS = "steps" From 4e3c8bcc6c6e8d1083ff5acc47f26437c67d4d88 Mon Sep 17 00:00:00 2001 From: "Sean (Seok-Won) Yi" Date: Sat, 6 Jul 2024 22:05:20 +0900 Subject: [PATCH 03/19] IntervalStrategy -> SaveStrategy --- src/transformers/trainer_callback.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/transformers/trainer_callback.py b/src/transformers/trainer_callback.py index 405874acf8f4c4..56f8baef835241 100644 --- a/src/transformers/trainer_callback.py +++ b/src/transformers/trainer_callback.py @@ -24,7 +24,7 @@ import numpy as np from tqdm.auto import tqdm -from .trainer_utils import IntervalStrategy, has_length +from .trainer_utils import IntervalStrategy, SaveStrategy, has_length from .training_args import TrainingArguments from .utils import logging @@ -555,7 +555,7 @@ def on_step_end(self, args: TrainingArguments, state: TrainerState, control: Tra # Save if ( - args.save_strategy == IntervalStrategy.STEPS + args.save_strategy == SaveStrategy.STEPS and state.save_steps > 0 and state.global_step % state.save_steps == 0 ): @@ -565,7 +565,7 @@ def on_step_end(self, args: TrainingArguments, state: TrainerState, control: Tra if state.global_step >= state.max_steps: control.should_training_stop = True # Save the model at the end if we have a save strategy - if args.save_strategy != IntervalStrategy.NO: + if args.save_strategy != SaveStrategy.NO: control.should_save = True return control @@ -580,7 +580,7 @@ def on_epoch_end(self, args: TrainingArguments, state: TrainerState, control: Tr control.should_evaluate = True # Save - if args.save_strategy == IntervalStrategy.EPOCH: + if args.save_strategy == SaveStrategy.EPOCH: control.should_save = True return control From 110754027a049539bd5f3198f63d80e7e2c9f3e9 Mon Sep 17 00:00:00 2001 From: "Sean (Seok-Won) Yi" Date: Sat, 6 Jul 2024 22:06:05 +0900 Subject: [PATCH 04/19] IntervalStratgy -> SaveStrategy for save_strat. --- src/transformers/training_args.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/transformers/training_args.py b/src/transformers/training_args.py index 485610dd9baa28..ae4649b8fec4fb 100644 --- a/src/transformers/training_args.py +++ b/src/transformers/training_args.py @@ -33,6 +33,7 @@ FSDPOption, HubStrategy, IntervalStrategy, + SaveStrategy, SchedulerType, ) from .utils import ( From 4528be611dc18be471c746fe44fac1a90f794382 Mon Sep 17 00:00:00 2001 From: "Sean (Seok-Won) Yi" Date: Sat, 6 Jul 2024 22:09:49 +0900 Subject: [PATCH 05/19] Interval -> Save in docstring. --- src/transformers/training_args_tf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/training_args_tf.py b/src/transformers/training_args_tf.py index 9df53c3f1d6161..3716a78879d501 100644 --- a/src/transformers/training_args_tf.py +++ b/src/transformers/training_args_tf.py @@ -114,7 +114,7 @@ class TFTrainingArguments(TrainingArguments): Whether to log and evaluate the first `global_step` or not. logging_steps (`int`, *optional*, defaults to 500): Number of update steps between two logs if `logging_strategy="steps"`. - save_strategy (`str` or [`~trainer_utils.IntervalStrategy`], *optional*, defaults to `"steps"`): + save_strategy (`str` or [`~trainer_utils.SaveStrategy`], *optional*, defaults to `"steps"`): The checkpoint save strategy to adopt during training. Possible values are: - `"no"`: No save is done during training. From 9ec07f5759e53107e1c9d0ac9ddf04eb66563318 Mon Sep 17 00:00:00 2001 From: "Sean (Seok-Won) Yi" Date: Sat, 6 Jul 2024 22:10:16 +0900 Subject: [PATCH 06/19] Updated docstring for save_strategy. --- src/transformers/training_args.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/transformers/training_args.py b/src/transformers/training_args.py index ae4649b8fec4fb..2ce922a0af7455 100644 --- a/src/transformers/training_args.py +++ b/src/transformers/training_args.py @@ -350,12 +350,13 @@ class TrainingArguments: - save_strategy (`str` or [`~trainer_utils.IntervalStrategy`], *optional*, defaults to `"steps"`): + save_strategy (`str` or [`~trainer_utils.SaveStrategy`], *optional*, defaults to `"steps"`): The checkpoint save strategy to adopt during training. Possible values are: - `"no"`: No save is done during training. - `"epoch"`: Save is done at the end of each epoch. - `"steps"`: Save is done every `save_steps`. + - `"best"`: Save is done every time a new best metric is achieved. If `"epoch"` or `"steps"` is chosen, saving will also be performed at the very end of training, always. @@ -963,7 +964,7 @@ class TrainingArguments: }, ) logging_nan_inf_filter: bool = field(default=True, metadata={"help": "Filter nan and inf losses for logging."}) - save_strategy: Union[IntervalStrategy, str] = field( + save_strategy: Union[SaveStrategy, str] = field( default="steps", metadata={"help": "The checkpoint save strategy to use."}, ) @@ -1581,7 +1582,7 @@ def __post_init__(self): self.eval_strategy = IntervalStrategy(self.eval_strategy) self.logging_strategy = IntervalStrategy(self.logging_strategy) - self.save_strategy = IntervalStrategy(self.save_strategy) + self.save_strategy = SaveStrategy(self.save_strategy) self.hub_strategy = HubStrategy(self.hub_strategy) self.lr_scheduler_type = SchedulerType(self.lr_scheduler_type) @@ -1617,7 +1618,7 @@ def __post_init__(self): if self.eval_steps != int(self.eval_steps): raise ValueError(f"--eval_steps must be an integer if bigger than 1: {self.eval_steps}") self.eval_steps = int(self.eval_steps) - if self.save_strategy == IntervalStrategy.STEPS and self.save_steps > 1: + if self.save_strategy == SaveStrategy.STEPS and self.save_steps > 1: if self.save_steps != int(self.save_steps): raise ValueError(f"--save_steps must be an integer if bigger than 1: {self.save_steps}") self.save_steps = int(self.save_steps) @@ -2751,8 +2752,8 @@ def set_save( 100 ``` """ - self.save_strategy = IntervalStrategy(strategy) - if self.save_strategy == IntervalStrategy.STEPS and steps == 0: + self.save_strategy = SaveStrategy(strategy) + if self.save_strategy == SaveStrategy.STEPS and steps == 0: raise ValueError("Setting `strategy` as 'steps' requires a positive value for `steps`.") self.save_steps = steps self.save_total_limit = total_limit From a494a54b191a4b3a1c277f8318f37a8d1a3760e7 Mon Sep 17 00:00:00 2001 From: "Sean (Seok-Won) Yi" Date: Sat, 6 Jul 2024 14:14:25 +0900 Subject: [PATCH 07/19] Added SaveStrategy and made according changes. `save_strategy` previously followed `IntervalStrategy` but now follows `SaveStrategy`. Changes were made accordingly to the code and the docstring. --- src/transformers/training_args.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/training_args.py b/src/transformers/training_args.py index 2ce922a0af7455..c98e8bc41b924d 100644 --- a/src/transformers/training_args.py +++ b/src/transformers/training_args.py @@ -356,7 +356,7 @@ class TrainingArguments: - `"no"`: No save is done during training. - `"epoch"`: Save is done at the end of each epoch. - `"steps"`: Save is done every `save_steps`. - - `"best"`: Save is done every time a new best metric is achieved. + - `"best"`: Save is done whenever a new `best_metric` is achieved. If `"epoch"` or `"steps"` is chosen, saving will also be performed at the very end of training, always. From 6831cd456de3afef23986c31384548e650adba78 Mon Sep 17 00:00:00 2001 From: "Sean (Seok-Won) Yi" Date: Sat, 6 Jul 2024 22:12:56 +0900 Subject: [PATCH 08/19] Changes from `make fixup`. --- src/transformers/trainer.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index 5c1a000fa95dc1..8c090ef4187906 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -117,7 +117,6 @@ EvalPrediction, HPSearchBackend, HubStrategy, - IntervalStrategy, PredictionOutput, RemoveColumnsCollator, SaveStrategy, @@ -3114,10 +3113,7 @@ def _determine_best_metric(self, metrics, trial): metric_value = metrics["eval_loss"] operator = np.less - if ( - self.state.best_metric is None - or operator(metric_value, self.state.best_metric) - ): + if self.state.best_metric is None or operator(metric_value, self.state.best_metric): run_dir = self._get_output_dir(trial=trial) checkpoint_folder = f"{PREFIX_CHECKPOINT_DIR}-{self.state.global_step}" output_dir = os.path.join(run_dir, checkpoint_folder) From 45893201f12b1adf769e2178472d0babd416df34 Mon Sep 17 00:00:00 2001 From: "Sean (Seok-Won) Yi" Date: Sat, 6 Jul 2024 22:23:30 +0900 Subject: [PATCH 09/19] Removed redundant metrics argument. --- src/transformers/trainer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index 8c090ef4187906..ad1e7ae044b54b 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -3004,7 +3004,7 @@ def _maybe_log_save_evaluate(self, tr_loss, grad_norm, model, trial, epoch, igno self.control_should_save = False if self.control.should_save: - self._save_checkpoint(model, trial, metrics=metrics) + self._save_checkpoint(model, trial) self.control = self.callback_handler.on_save(self.args, self.state, self.control) def _load_rng_state(self, checkpoint): From 1ab8356df370ec74a4a28731722044d75f005e8d Mon Sep 17 00:00:00 2001 From: "Sean (Seok-Won) Yi" Date: Sun, 11 Aug 2024 16:44:42 +0900 Subject: [PATCH 10/19] Added new test_save_best_checkpoint test. 1. Checks for both cases where `metric_for_best_model` is explicitly provided and when it's not provided. 2. The first case should have two checkpoints saved, whereas the second should have three saved. --- tests/trainer/test_trainer.py | 69 +++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 5c03355785d2b5..ae30cb28ce45cc 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -4041,6 +4041,75 @@ def test_trainer_saves_processor(self): reloaded_tokenizer(test_sentence, padding="max_length").input_ids, ) + def test_save_best_checkpoint(self): + freq = int(64 / self.batch_size) + total = int(self.n_epochs * 64 / self.batch_size) + + # Case 1: Metric name provided. + with tempfile.TemporaryDirectory() as tmpdir: + trainer = get_regression_trainer( + a=1.5, + b=2.5, + output_dir=tmpdir, + learning_rate=0.1, + eval_strategy="epoch", + save_strategy="best", + metric_for_best_model="accuracy", + compute_metrics=AlmostAccuracy(), + ) + self.assertTrue(trainer.args.metric_for_best_model == "accuracy") + + # Patch the `_evaluate` method to control the metrics returned during evaluation + with patch.object( + trainer, + "_evaluate", + side_effect=[ + {"eval_loss": 0.03, "eval_accuracy": 0.60, "epoch": 1.0}, + {"eval_loss": 0.02, "eval_accuracy": 0.65, "epoch": 2.0}, + {"eval_loss": 0.01, "eval_accuracy": 0.64, "epoch": 3.0}, + ], + ): + trainer.train() + + self.assertEqual(len(os.listdir(tmpdir)), 2) + self.check_saved_checkpoints( + output_dir=tmpdir, + freq=freq, + total=total, + ) + + # Case 2: Metric name not provided, default to loss. + with tempfile.TemporaryDirectory() as tmpdir: + trainer = get_regression_trainer( + a=1.5, + b=2.5, + output_dir=tmpdir, + learning_rate=0.1, + eval_strategy="epoch", + save_strategy="best", + compute_metrics=AlmostAccuracy(), + ) + self.assertTrue(trainer.args.metric_for_best_model == "loss") + + # Patch the `_evaluate` method to control the metrics returned during evaluation + with patch.object( + trainer, + "_evaluate", + side_effect=[ + {"eval_loss": 0.03, "eval_accuracy": 0.60, "epoch": 1.0}, + {"eval_loss": 0.02, "eval_accuracy": 0.65, "epoch": 2.0}, + {"eval_loss": 0.01, "eval_accuracy": 0.64, "epoch": 3.0}, + ], + ): + trainer.train() + + self.assertEqual(len(os.listdir(tmpdir)), 3) + self.check_saved_checkpoints( + output_dir=tmpdir, + freq=freq, + total=total, + ) + @require_torch @is_staging_test From b711c257ec6debef49786e778ccaf359f8429e42 Mon Sep 17 00:00:00 2001 From: "Sean (Seok-Won) Yi" Date: Sun, 11 Aug 2024 16:45:49 +0900 Subject: [PATCH 11/19] Changed should_training_end saving logic. The Trainer saves a checkpoints at the end of training by default as long as `save_strategy != SaveStrategy.NO`. This condition was modified to include `SaveStrategy.BEST` because it would be counterintuitive that we'd only want the best checkpoint to be saved but the last one is as well. --- src/transformers/trainer_callback.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/trainer_callback.py b/src/transformers/trainer_callback.py index 56f8baef835241..ce9f2a26732c2e 100644 --- a/src/transformers/trainer_callback.py +++ b/src/transformers/trainer_callback.py @@ -565,7 +565,7 @@ def on_step_end(self, args: TrainingArguments, state: TrainerState, control: Tra if state.global_step >= state.max_steps: control.should_training_stop = True # Save the model at the end if we have a save strategy - if args.save_strategy != SaveStrategy.NO: + if args.save_strategy not in [SaveStrategy.NO, SaveStrategy.BEST]: control.should_save = True return control From 0956ad2979b2e4652d455c658aa064b0a4c70348 Mon Sep 17 00:00:00 2001 From: "Sean (Seok-Won) Yi" Date: Sun, 11 Aug 2024 16:46:55 +0900 Subject: [PATCH 12/19] `args.metric_for_best_model` default to loss. --- src/transformers/trainer.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index ad1e7ae044b54b..c5d85594db6432 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -767,6 +767,9 @@ def tokenizer(self, processing_class) -> None: ) self.processing_class = processing_class + if not self.args.metric_for_best_model: + self.args.metric_for_best_model = "loss" + def _activate_neftune(self, model): r""" Activates the neftune as presented in this code: https://github.com/neelsjain/NEFTune and paper: @@ -2998,10 +3001,7 @@ def _maybe_log_save_evaluate(self, tr_loss, grad_norm, model, trial, epoch, igno new_best_metric = self._determine_best_metric(metrics=metrics, trial=trial) if self.args.save_strategy == SaveStrategy.BEST: - if new_best_metric: - self.control.should_save = True - else: - self.control_should_save = False + self.control.should_save = new_best_metric if self.control.should_save: self._save_checkpoint(model, trial) @@ -3105,10 +3105,7 @@ def _determine_best_metric(self, metrics, trial): f"The available evaluation metrics are: {list(metrics.keys())}. Consider changing the `metric_for_best_model` via the TrainingArguments." ) from exc - if self.args.greater_is_better: - operator = np.greater - else: - operator = np.less + operator = np.greater if self.args.greater_is_better else np.less else: metric_value = metrics["eval_loss"] operator = np.less From df5a0356934c87e1fb90c97084732126f7ad54db Mon Sep 17 00:00:00 2001 From: "Sean (Seok-Won) Yi" Date: Sun, 11 Aug 2024 17:15:04 +0900 Subject: [PATCH 13/19] Undo metric_for_best_model update. --- src/transformers/trainer.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index c5d85594db6432..652061a37716ad 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -767,9 +767,6 @@ def tokenizer(self, processing_class) -> None: ) self.processing_class = processing_class - if not self.args.metric_for_best_model: - self.args.metric_for_best_model = "loss" - def _activate_neftune(self, model): r""" Activates the neftune as presented in this code: https://github.com/neelsjain/NEFTune and paper: From bfbe2e0feecdbbda658a8c0a6ffa61df36adb462 Mon Sep 17 00:00:00 2001 From: "Sean (Seok-Won) Yi" Date: Sun, 11 Aug 2024 17:15:34 +0900 Subject: [PATCH 14/19] Remove checking metric_for_best_model. --- tests/trainer/test_trainer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index ae30cb28ce45cc..e47440650921cb 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -4089,7 +4089,6 @@ def test_save_best_checkpoint(self): save_strategy="best", compute_metrics=AlmostAccuracy(), ) - self.assertTrue(trainer.args.metric_for_best_model == "loss") # Patch the `_evaluate` method to control the metrics returned during evaluation with patch.object( From ff932cd0651e81cfe0b10a38db9fdd08cce6eb2b Mon Sep 17 00:00:00 2001 From: "Sean (Seok-Won) Yi" Date: Sat, 19 Oct 2024 13:05:22 +0900 Subject: [PATCH 15/19] Added test cases for loss and no metric. --- tests/trainer/test_trainer.py | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index e47440650921cb..cc578b50264776 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -25,6 +25,7 @@ import sys import tempfile import unittest +import warnings from functools import partial from itertools import product from pathlib import Path @@ -4045,7 +4046,7 @@ def test_save_best_checkpoint(self): freq = int(64 / self.batch_size) total = int(self.n_epochs * 64 / self.batch_size) - # Case 1: Metric name provided. + # Case 1: args.metric_for_best_model == "accuracy". with tempfile.TemporaryDirectory() as tmpdir: trainer = get_regression_trainer( a=1.5, @@ -4059,7 +4060,6 @@ def test_save_best_checkpoint(self): ) self.assertTrue(trainer.args.metric_for_best_model == "accuracy") - # Patch the `_evaluate` method to control the metrics returned during evaluation with patch.object( trainer, "_evaluate", @@ -4078,7 +4078,7 @@ def test_save_best_checkpoint(self): total=total, ) - # Case 2: Metric name not provided, default to loss. + # Case 2: args.metric_for_best_model == "loss". with tempfile.TemporaryDirectory() as tmpdir: trainer = get_regression_trainer( a=1.5, @@ -4087,28 +4087,44 @@ def test_save_best_checkpoint(self): learning_rate=0.1, eval_strategy="epoch", save_strategy="best", + metric_for_best_model="loss", compute_metrics=AlmostAccuracy(), ) + self.assertTrue(trainer.args.metric_for_best_model == "loss") - # Patch the `_evaluate` method to control the metrics returned during evaluation with patch.object( trainer, "_evaluate", side_effect=[ {"eval_loss": 0.03, "eval_accuracy": 0.60, "epoch": 1.0}, {"eval_loss": 0.02, "eval_accuracy": 0.65, "epoch": 2.0}, - {"eval_loss": 0.01, "eval_accuracy": 0.64, "epoch": 3.0}, + {"eval_loss": 0.03, "eval_accuracy": 0.66, "epoch": 3.0}, ], ): trainer.train() - self.assertEqual(len(os.listdir(tmpdir)), 3) + self.assertEqual(len(os.listdir(tmpdir)), 2) self.check_saved_checkpoints( output_dir=tmpdir, freq=freq, total=total, ) + # Case 3: Metric name not provided; throw error. + with tempfile.TemporaryDirectory() as tmpdir: + with self.assertRaises(ValueError) as context: + trainer = get_regression_trainer( + a=1.5, + b=2.5, + output_dir=tmpdir, + learning_rate=0.1, + eval_strategy="epoch", + save_strategy="best", + compute_metrics=AlmostAccuracy(), + ) + + self.assertIn("`args.metric_for_best_model` must be provided", str(context.exception)) + @require_torch @is_staging_test From f6e272511b638588cc66641a6bba311f0522cafa Mon Sep 17 00:00:00 2001 From: "Sean (Seok-Won) Yi" Date: Sat, 19 Oct 2024 13:06:20 +0900 Subject: [PATCH 16/19] Added error for metric and changed default best_metric. --- src/transformers/trainer.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index 652061a37716ad..4b31b992022e3e 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -419,6 +419,12 @@ def __init__( raise ValueError( f"You have set `args.eval_strategy` to {args.eval_strategy} but you didn't pass an `eval_dataset` to `Trainer`. Either set `args.eval_strategy` to `no` or pass an `eval_dataset`. " ) + if args.save_strategy == SaveStrategy.BEST or args.load_best_model_at_end: + if args.metric_for_best_model is None: + raise ValueError( + "`args.metric_for_best_model` must be provided when using 'best' save_strategy or if `args.load_best_model_at_end` is set to `True`." + ) + self.args = args self.compute_loss_func = compute_loss_func # Seed must be set before instantiating the model when using model @@ -3103,19 +3109,19 @@ def _determine_best_metric(self, metrics, trial): ) from exc operator = np.greater if self.args.greater_is_better else np.less - else: - metric_value = metrics["eval_loss"] - operator = np.less - if self.state.best_metric is None or operator(metric_value, self.state.best_metric): - run_dir = self._get_output_dir(trial=trial) - checkpoint_folder = f"{PREFIX_CHECKPOINT_DIR}-{self.state.global_step}" - output_dir = os.path.join(run_dir, checkpoint_folder) + if self.state.best_metric is None: + self.state.best_metric = float("-inf") if self.args.greater_is_better else float("inf") + + if operator(metric_value, self.state.best_metric): + run_dir = self._get_output_dir(trial=trial) + checkpoint_folder = f"{PREFIX_CHECKPOINT_DIR}-{self.state.global_step}" + output_dir = os.path.join(run_dir, checkpoint_folder) - self.state.best_metric = metric_value - self.state.best_model_checkpoint = output_dir + self.state.best_metric = metric_value + self.state.best_model_checkpoint = output_dir - new_best_metric = True + new_best_metric = True return new_best_metric From aa44b305f5cd9e6e2386169592b6eb9a437a5798 Mon Sep 17 00:00:00 2001 From: "Sean (Seok-Won) Yi" Date: Sat, 19 Oct 2024 13:12:42 +0900 Subject: [PATCH 17/19] Removed unused import. --- tests/trainer/test_trainer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index cc578b50264776..b6fe807fa4961a 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -25,7 +25,6 @@ import sys import tempfile import unittest -import warnings from functools import partial from itertools import product from pathlib import Path From fbc990dc85904b4ad7f3779a5d16f58a16722fd8 Mon Sep 17 00:00:00 2001 From: "Sean (Seok-Won) Yi" Date: Fri, 25 Oct 2024 23:51:34 +0900 Subject: [PATCH 18/19] `new_best_metric` -> `is_new_best_metric` Co-authored-by: Arthur <48595927+ArthurZucker@users.noreply.github.com> --- src/transformers/trainer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index 4b31b992022e3e..229bde0d6e28b8 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -3001,7 +3001,7 @@ def _maybe_log_save_evaluate(self, tr_loss, grad_norm, model, trial, epoch, igno metrics = None if self.control.should_evaluate: metrics = self._evaluate(trial, ignore_keys_for_eval) - new_best_metric = self._determine_best_metric(metrics=metrics, trial=trial) + is_new_best_metric = self._determine_best_metric(metrics=metrics, trial=trial) if self.args.save_strategy == SaveStrategy.BEST: self.control.should_save = new_best_metric From 38216fe6ea0ce5c9671debe333d68c56eb032107 Mon Sep 17 00:00:00 2001 From: "Sean (Seok-Won) Yi" Date: Sat, 26 Oct 2024 00:48:49 +0900 Subject: [PATCH 19/19] Applied `is_new_best_metric` to all. Changes were made for consistency and also to fix a potential bug. --- src/transformers/trainer.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index 229bde0d6e28b8..6fc4dc22084ba3 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -3004,7 +3004,7 @@ def _maybe_log_save_evaluate(self, tr_loss, grad_norm, model, trial, epoch, igno is_new_best_metric = self._determine_best_metric(metrics=metrics, trial=trial) if self.args.save_strategy == SaveStrategy.BEST: - self.control.should_save = new_best_metric + self.control.should_save = is_new_best_metric if self.control.should_save: self._save_checkpoint(model, trial) @@ -3092,7 +3092,7 @@ def _determine_best_metric(self, metrics, trial): Returns: bool: True if a new best metric was found, else False """ - new_best_metric = False + is_new_best_metric = False if self.args.metric_for_best_model is not None: metric_to_check = self.args.metric_for_best_model @@ -3121,9 +3121,9 @@ def _determine_best_metric(self, metrics, trial): self.state.best_metric = metric_value self.state.best_model_checkpoint = output_dir - new_best_metric = True + is_new_best_metric = True - return new_best_metric + return is_new_best_metric def _save_checkpoint(self, model, trial): # In all cases, including ddp/dp/deepspeed, self.model is always a reference to the model we