diff --git a/src/transformers/generation/tf_logits_process.py b/src/transformers/generation/tf_logits_process.py index 58824b7b0071b7..91e20fe02f7f4f 100644 --- a/src/transformers/generation/tf_logits_process.py +++ b/src/transformers/generation/tf_logits_process.py @@ -581,7 +581,7 @@ def _force_token(generation_idx): batch_size = scores.shape[0] current_token = self.force_token_array[generation_idx] - new_scores = tf.ones_like(scores, dtype=scores.dtype) * -float("inf") + new_scores = tf.zeros_like(scores, dtype=scores.dtype) + tf.constant([scores.dtype.min]) indices = tf.stack((tf.range(batch_size), tf.tile([current_token], [batch_size])), axis=1) updates = tf.zeros((batch_size,), dtype=scores.dtype) new_scores = tf.tensor_scatter_nd_update(new_scores, indices, updates) diff --git a/tests/generation/test_tf_logits_process.py b/tests/generation/test_tf_logits_process.py index e87c843d9cb4de..f06f5695b1cef8 100644 --- a/tests/generation/test_tf_logits_process.py +++ b/tests/generation/test_tf_logits_process.py @@ -406,7 +406,12 @@ def test_force_tokens_logits_processor(self, use_xla): non_forced_inds = [i for i in range(vocab_size) if i != force_token_map[cur_len]] self.assertTrue( - tf.math.reduce_all(tf.math.is_inf(tf.gather(scores, [non_forced_inds], axis=1))), + tf.math.reduce_all( + tf.experimental.numpy.isclose( + tf.gather(scores, [non_forced_inds], axis=1), + tf.constant(scores.dtype.min), + ) + ) ) # check that if the cur_len is not contained in the force_token_map, the logits are not modified