diff --git a/tests/models/seamless_m4t/test_modeling_seamless_m4t.py b/tests/models/seamless_m4t/test_modeling_seamless_m4t.py index 0413721ba681dc..68979202d46e6e 100644 --- a/tests/models/seamless_m4t/test_modeling_seamless_m4t.py +++ b/tests/models/seamless_m4t/test_modeling_seamless_m4t.py @@ -20,7 +20,7 @@ import unittest from transformers import SeamlessM4TConfig, is_speech_available, is_torch_available -from transformers.testing_utils import is_flaky, require_torch, slow, torch_device +from transformers.testing_utils import require_torch, slow, torch_device from transformers.trainer_utils import set_seed from transformers.utils import cached_property @@ -610,9 +610,11 @@ def test_attention_outputs(self): [self.model_tester.num_attention_heads, encoder_seq_length, encoder_key_length], ) - @is_flaky() + @unittest.skip( + reason="In training model, the first speech encoder layer is sometimes skipped. Training is not supported yet, so the test is ignored." + ) def test_retain_grad_hidden_states_attentions(self): - super().test_retain_grad_hidden_states_attentions() + pass @require_torch