test 3

ROCm · Oct 10, 2024 · cea9253 · cea9253
1 parent 017b758
commit cea9253
Show file tree

Hide file tree

Showing 3 changed files with 4 additions and 7 deletions.
diff --git a/examples/pytorch/translation/run_translation.py b/examples/pytorch/translation/run_translation.py
@@ -381,7 +381,7 @@ def main():
         model_args.config_name if model_args.config_name else model_args.model_name_or_path,
         cache_dir=model_args.cache_dir,
         revision=model_args.model_revision,
-        #use_auth_token=True if model_args.use_auth_token else None,
+        use_auth_token=True if model_args.use_auth_token else None,
         ort=True if training_args.ort else None,
         token=model_args.token,
         trust_remote_code=model_args.trust_remote_code,

diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py
@@ -2495,9 +2495,9 @@ def _inner_training_loop(
 
         # add remaining tr_loss
         self._total_loss_scalar += tr_loss.item()
-        #effective_global_step = max(self.state.global_step, 0.001)  # Avoid ZeroDivisionError
-        #train_loss = self._total_loss_scalar / effective_global_step
-        train_loss = self._total_loss_scalar / self.state.global_step
+        effective_global_step = max(self.state.global_step, 0.001)  # Avoid ZeroDivisionError
+        train_loss = self._total_loss_scalar / effective_global_step
+        #train_loss = self._total_loss_scalar / self.state.global_step
 
 
         metrics = speed_metrics("train", start_time, num_samples=num_train_samples, num_steps=self.state.max_steps,num_tokens=num_train_tokens,)

diff --git a/src/transformers/training_args.py b/src/transformers/training_args.py
@@ -564,7 +564,6 @@ class TrainingArguments:
             Use [Deepspeed](https://github.com/microsoft/deepspeed). This is an experimental feature and its API may
             evolve in the future. The value is either the location of DeepSpeed json config file (e.g.,
             `ds_config.json`) or an already loaded json file as a `dict`"
-<<<<<<< HEAD
 
             <Tip warning={true}>
                 If enabling any Zero-init, make sure that your model is not initialized until
@@ -600,8 +599,6 @@ class TrainingArguments:
                     If `True`, an `Accelerator` or `PartialState` must be initialized. Note that by doing so, this could lead to issues
                     with hyperparameter tuning.
 
-=======
->>>>>>> origin/main_old
         ortmodule (:obj:`bool`, `optional`):
             Use `ORTModule <https://github.com/microsoft/onnxruntime>`__.
         label_smoothing_factor (`float`, *optional*, defaults to 0.0):