From 5346db168481640d1ce18f464470b102993049e7 Mon Sep 17 00:00:00 2001 From: Sourab Mangrulkar <13534540+pacman100@users.noreply.github.com> Date: Tue, 6 Feb 2024 11:25:44 +0530 Subject: [PATCH] Raise error when using `save_only_model` with `load_best_model_at_end` for DeepSpeed/FSDP (#28866) * Raise error when using `save_only_model` with `load_best_model_at_end` for DeepSpeed/FSDP * Update trainer.py --- src/transformers/trainer.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index 74e484acacde3f..c71cf9d7ad1f37 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -4054,6 +4054,15 @@ def create_accelerator_and_postprocess(self): if self.is_deepspeed_enabled and getattr(self.args, "hf_deepspeed_config", None) is None: self.propagate_args_to_deepspeed() + # `save_only_model` can't be used with DeepSpeed/FSDP along with `load_best_model_at_end` + if ( + self.args.save_only_model + and (self.is_deepspeed_enabled or self.is_fsdp_enabled) + and self.args.load_best_model_at_end + ): + wrapper = "DeepSpeed" if self.is_deepspeed_enabled else "FSDP" + raise ValueError(f"{wrapper} can't be used with `save_only_model` along with `load_best_model_at_end`.") + def propagate_args_to_deepspeed(self, auto_find_batch_size=False): """ Sets values in the deepspeed plugin based on the Trainer args