🧘 Replace F.log(F.sigmoid(log_odds) with F.logsigmoid(log_odds) (#…

…2274) Co-authored-by: Quentin Gallouédec <[email protected]>
huggingface · Oct 24, 2024 · 57ba9b9 · 57ba9b9
1 parent 0de75b2
commit 57ba9b9
Showing 1 changed file with 1 addition and 2 deletions.
diff --git a/trl/trainer/orpo_trainer.py b/trl/trainer/orpo_trainer.py
@@ -666,8 +666,7 @@ def odds_ratio_loss(
         log_odds = (policy_chosen_logps - policy_rejected_logps) - (
             torch.log1p(-torch.exp(policy_chosen_logps)) - torch.log1p(-torch.exp(policy_rejected_logps))
         )
-        sig_ratio = F.sigmoid(log_odds)
-        ratio = torch.log(sig_ratio)
+        ratio = F.logsigmoid(log_odds)
         losses = self.beta * ratio
 
         chosen_rewards = self.beta * (policy_chosen_logps.to(self.accelerator.device)).detach()