From b22eaf9e135c5fb4b21290f9f17e939531fa0ac1 Mon Sep 17 00:00:00 2001 From: Nilesh Kokane Date: Thu, 25 Jan 2024 21:35:05 +0530 Subject: [PATCH] Fixed nll with label_smoothing to nll --- src/transformers/models/blip/modeling_blip_text.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/models/blip/modeling_blip_text.py b/src/transformers/models/blip/modeling_blip_text.py index 353c0f486a5629..1ac95ea17054b2 100644 --- a/src/transformers/models/blip/modeling_blip_text.py +++ b/src/transformers/models/blip/modeling_blip_text.py @@ -889,7 +889,7 @@ def forward( # we are doing next-token prediction; shift prediction scores and input ids by one shifted_prediction_scores = prediction_scores[:, :-1, :].contiguous() labels = labels[:, 1:].contiguous().to(shifted_prediction_scores.device) - loss_fct = CrossEntropyLoss(reduction=reduction, label_smoothing=0.1) + loss_fct = CrossEntropyLoss(reduction=reduction, label_smoothing=0.0) lm_loss = loss_fct(shifted_prediction_scores.view(-1, self.config.vocab_size), labels.view(-1)) if reduction == "none": lm_loss = lm_loss.view(prediction_scores.size(0), -1).sum(1)