From 831bc25d8fdb85768402f772cf65cc3d7872b211 Mon Sep 17 00:00:00 2001 From: David Valente <74915610+DavidAfonsoValente@users.noreply.github.com> Date: Fri, 1 Mar 2024 18:04:40 +0100 Subject: [PATCH] Correct zero division error in inverse sqrt scheduler (#28982) * Correct zero division error in inverse sqrt scheduler * default timescale to 10_000 --- src/transformers/optimization.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/optimization.py b/src/transformers/optimization.py index b3861b371a2393..65a41d1b1a44f2 100644 --- a/src/transformers/optimization.py +++ b/src/transformers/optimization.py @@ -317,7 +317,7 @@ def get_inverse_sqrt_schedule( # https://github.com/google-research/big_vision/blob/f071ce68852d56099437004fd70057597a95f6ef/big_vision/utils.py#L930 if timescale is None: - timescale = num_warmup_steps + timescale = num_warmup_steps or 10_000 lr_lambda = partial(_get_inverse_sqrt_schedule_lr_lambda, num_warmup_steps=num_warmup_steps, timescale=timescale) return LambdaLR(optimizer, lr_lambda, last_epoch=last_epoch)