diff --git a/tests/models/whisper/test_modeling_whisper.py b/tests/models/whisper/test_modeling_whisper.py index dcb495d95a6e4d..517307c552db58 100644 --- a/tests/models/whisper/test_modeling_whisper.py +++ b/tests/models/whisper/test_modeling_whisper.py @@ -30,6 +30,7 @@ import transformers from transformers import WhisperConfig from transformers.testing_utils import ( + is_flaky, is_pt_flax_cross_test, require_flash_attn, require_torch, @@ -1539,6 +1540,7 @@ def test_longform_generate_multi_batch(self): def test_longform_generate_multi_batch_cond_prev(self): self._check_longform_generate_multi_batch(condition_on_prev_tokens=True) + @is_flaky() # TODO (joao, sanchit): fails ~9% of the times. Does the original test have the same issue? def test_custom_4d_attention_mask(self): config, input_dict = self.model_tester.prepare_config_and_inputs_for_common() model = WhisperForConditionalGeneration(config).to(device=torch_device, dtype=torch.float32)