Drop inplace operation for loss computation with gradient accumulation (

#35416) Fix inplace loss computation
huggingface · Dec 26, 2024 · 4eb17b2 · 4eb17b2
1 parent 24c91f0
commit 4eb17b2
Showing 1 changed file with 1 addition and 1 deletion.
diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py
@@ -3700,7 +3700,7 @@ def training_step(
         else:
             # Finally we need to normalize the loss for reporting
             if num_items_in_batch is None:
-                loss /= self.args.gradient_accumulation_steps
+                loss = loss / self.args.gradient_accumulation_steps
 
             self.accelerator.backward(loss, **kwargs)