From 90d2459e5d782eb91fe2f65519d12d690740b92f Mon Sep 17 00:00:00 2001 From: Nilesh Kokane Date: Tue, 6 Feb 2024 15:12:36 +0530 Subject: [PATCH] Resolved conflict by rebase --- src/transformers/models/blip/modeling_tf_blip_text.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/models/blip/modeling_tf_blip_text.py b/src/transformers/models/blip/modeling_tf_blip_text.py index 19d8bc9b6ecfa0..c30c72fe50fa4b 100644 --- a/src/transformers/models/blip/modeling_tf_blip_text.py +++ b/src/transformers/models/blip/modeling_tf_blip_text.py @@ -1063,7 +1063,7 @@ def call( # Keras won't give us label smoothing for sparse CE, so we de-sparsify things here # Use relu to clamp masked labels at 0 to avoid NaN (we will be zeroing those out later anyway) one_hot_labels = tf.one_hot(tf.nn.relu(labels), depth=self.config.vocab_size, dtype=tf.float32) - loss_fct = keras.losses.CategoricalCrossentropy(from_logits=True, label_smoothing=0.1, reduction="none") + loss_fct = keras.losses.CategoricalCrossentropy(from_logits=True, label_smoothing=0.0, reduction="none") masked_positions = tf.cast(tf.not_equal(labels, -100), dtype=tf.float32) lm_loss = loss_fct(one_hot_labels, shifted_prediction_scores) lm_loss *= masked_positions