diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index fa508a35077038..74e484acacde3f 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -2465,7 +2465,9 @@ def _save_checkpoint(self, model, trial, metrics=None): # Maybe delete some older checkpoints. if self.args.should_save: - self._rotate_checkpoints(use_mtime=True, output_dir=run_dir) + # Solely rely on numerical checkpoint id for rotation. + # mtime is not reliable especially on some fuse fs in cloud environments. + self._rotate_checkpoints(use_mtime=False, output_dir=run_dir) self.args.distributed_state.wait_for_everyone()