From f83c6f1d02fba5e5ced9357b9c9196c76d937af3 Mon Sep 17 00:00:00 2001 From: Sanchit Gandhi <93869735+sanchit-gandhi@users.noreply.github.com> Date: Tue, 23 Jul 2024 14:54:38 +0800 Subject: [PATCH] Remove `trust_remote_code` when loading Libri Dummy (#31748) * [whisper integration] use parquet dataset for testing * propagate to others * more propagation * last one --- src/transformers/commands/pt_to_tf.py | 4 +- src/transformers/generation/logits_process.py | 6 +- src/transformers/models/clvp/modeling_clvp.py | 4 +- .../modeling_speech_to_text_2.py | 2 +- .../models/hubert/modeling_hubert.py | 2 +- .../models/hubert/modeling_tf_hubert.py | 4 +- .../modeling_speech_encoder_decoder.py | 2 +- .../speech_to_text/modeling_speech_to_text.py | 4 +- .../modeling_tf_speech_to_text.py | 2 +- .../models/univnet/modeling_univnet.py | 2 +- .../models/wav2vec2/modeling_flax_wav2vec2.py | 6 +- .../models/wav2vec2/modeling_tf_wav2vec2.py | 4 +- .../models/wav2vec2/modeling_wav2vec2.py | 2 +- .../modeling_wav2vec2_conformer.py | 2 +- .../models/whisper/generation_whisper.py | 2 +- .../models/whisper/modeling_flax_whisper.py | 8 +- .../models/whisper/modeling_tf_whisper.py | 6 +- .../models/whisper/modeling_whisper.py | 6 +- ...xtraction_audio_spectrogram_transformer.py | 4 +- .../clap/test_feature_extraction_clap.py | 4 +- tests/models/clap/test_modeling_clap.py | 16 +-- .../clvp/test_feature_extraction_clvp.py | 4 +- tests/models/clvp/test_modeling_clvp.py | 8 +- .../data2vec/test_modeling_data2vec_audio.py | 4 +- .../test_feature_extraction_encodec.py | 4 +- tests/models/encodec/test_modeling_encodec.py | 12 +-- tests/models/hubert/test_modeling_hubert.py | 4 +- .../models/hubert/test_modeling_tf_hubert.py | 4 +- .../test_feature_extraction_pop2piano.py | 4 +- .../pop2piano/test_processor_pop2piano.py | 4 +- .../test_feature_extraction_seamless_m4t.py | 4 +- tests/models/sew/test_modeling_sew.py | 4 +- tests/models/sew_d/test_modeling_sew_d.py | 4 +- .../test_feature_extraction_speech_to_text.py | 4 +- .../test_modeling_speech_to_text.py | 4 +- .../test_modeling_tf_speech_to_text.py | 4 +- .../test_feature_extraction_speecht5.py | 4 +- .../models/speecht5/test_modeling_speecht5.py | 8 +- .../unispeech/test_modeling_unispeech.py | 4 +- .../test_modeling_unispeech_sat.py | 4 +- .../test_feature_extraction_univnet.py | 4 +- tests/models/univnet/test_modeling_univnet.py | 4 +- .../wav2vec2/test_modeling_flax_wav2vec2.py | 4 +- .../wav2vec2/test_modeling_tf_wav2vec2.py | 4 +- .../models/wav2vec2/test_modeling_wav2vec2.py | 4 +- .../test_modeling_wav2vec2_bert.py | 4 +- .../test_modeling_wav2vec2_conformer.py | 4 +- tests/models/wavlm/test_modeling_wavlm.py | 4 +- .../test_feature_extraction_whisper.py | 4 +- .../whisper/test_modeling_flax_whisper.py | 4 +- .../whisper/test_modeling_tf_whisper.py | 2 +- tests/models/whisper/test_modeling_whisper.py | 26 ++--- .../test_pipelines_audio_classification.py | 4 +- ..._pipelines_automatic_speech_recognition.py | 100 +++++------------- tests/pipelines/test_pipelines_common.py | 4 +- tests/utils/test_audio_utils.py | 4 +- 56 files changed, 110 insertions(+), 254 deletions(-) diff --git a/src/transformers/commands/pt_to_tf.py b/src/transformers/commands/pt_to_tf.py index 4df45f7f086a51..4002b5e0eb85a4 100644 --- a/src/transformers/commands/pt_to_tf.py +++ b/src/transformers/commands/pt_to_tf.py @@ -202,9 +202,7 @@ def get_inputs(self, pt_model, tf_dummy_inputs, config): """ def _get_audio_input(): - ds = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ) + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") speech_samples = ds.sort("id").select(range(2))[:2]["audio"] raw_samples = [x["array"] for x in speech_samples] return raw_samples diff --git a/src/transformers/generation/logits_process.py b/src/transformers/generation/logits_process.py index c9a978f5ee8990..b226a059d106b1 100644 --- a/src/transformers/generation/logits_process.py +++ b/src/transformers/generation/logits_process.py @@ -1760,7 +1760,7 @@ class SuppressTokensAtBeginLogitsProcessor(LogitsProcessor): >>> processor = AutoProcessor.from_pretrained("openai/whisper-tiny.en") >>> model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en") - >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True) + >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") >>> inputs = processor(ds[0]["audio"]["array"], return_tensors="pt") >>> # Whisper has `begin_suppress_tokens` set by default (= `[220, 50256]`). 50256 is the EOS token, so this means @@ -1812,7 +1812,7 @@ class SuppressTokensLogitsProcessor(LogitsProcessor): >>> processor = AutoProcessor.from_pretrained("openai/whisper-tiny.en") >>> model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en") - >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True) + >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") >>> inputs = processor(ds[0]["audio"]["array"], return_tensors="pt") >>> # Whisper has a long list of suppressed tokens. For instance, in this case, the token 1 is suppressed by default. @@ -1901,7 +1901,7 @@ class WhisperTimeStampLogitsProcessor(LogitsProcessor): >>> processor = AutoProcessor.from_pretrained("openai/whisper-tiny.en") >>> model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en") - >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True) + >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") >>> inputs = processor(ds[3]["audio"]["array"], return_tensors="pt") >>> input_features = inputs.input_features diff --git a/src/transformers/models/clvp/modeling_clvp.py b/src/transformers/models/clvp/modeling_clvp.py index 4124e380a3d73d..d53bed2a5d12fd 100644 --- a/src/transformers/models/clvp/modeling_clvp.py +++ b/src/transformers/models/clvp/modeling_clvp.py @@ -1681,7 +1681,7 @@ def get_speech_features( >>> # Define the Text and Load the Audio (We are taking an audio example from HuggingFace Hub using `datasets` library) >>> text = "This is an example text." - >>> ds = datasets.load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True) + >>> ds = datasets.load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") >>> ds = ds.cast_column("audio", datasets.Audio(sampling_rate=22050)) >>> _, audio, sr = ds.sort("id").select(range(1))[:1]["audio"][0].values() @@ -1754,7 +1754,7 @@ def forward( >>> # Define the Text and Load the Audio (We are taking an audio example from HuggingFace Hub using `datasets` library) >>> text = "This is an example text." - >>> ds = datasets.load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True) + >>> ds = datasets.load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") >>> ds = ds.cast_column("audio", datasets.Audio(sampling_rate=22050)) >>> _, audio, sr = ds.sort("id").select(range(1))[:1]["audio"][0].values() diff --git a/src/transformers/models/deprecated/speech_to_text_2/modeling_speech_to_text_2.py b/src/transformers/models/deprecated/speech_to_text_2/modeling_speech_to_text_2.py index 4db60e0faeb4c1..8f1a8370933c91 100755 --- a/src/transformers/models/deprecated/speech_to_text_2/modeling_speech_to_text_2.py +++ b/src/transformers/models/deprecated/speech_to_text_2/modeling_speech_to_text_2.py @@ -831,7 +831,7 @@ def forward( >>> model.config.decoder_start_token_id = tokenizer.bos_token_id >>> # pre-process inputs and labels - >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True) + >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") >>> inputs = feature_extractor( ... ds[0]["audio"]["array"], sampling_rate=ds[0]["audio"]["sampling_rate"], return_tensors="pt" ... ) diff --git a/src/transformers/models/hubert/modeling_hubert.py b/src/transformers/models/hubert/modeling_hubert.py index fd0c271b668109..da79c2894877b4 100755 --- a/src/transformers/models/hubert/modeling_hubert.py +++ b/src/transformers/models/hubert/modeling_hubert.py @@ -1325,7 +1325,7 @@ def forward( ... return batch - >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True) + >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") >>> ds = ds.map(map_to_array) >>> input_values = processor(ds["speech"][0], return_tensors="pt").input_values # Batch size 1 diff --git a/src/transformers/models/hubert/modeling_tf_hubert.py b/src/transformers/models/hubert/modeling_tf_hubert.py index 6c2a341927e200..2adfeea5b8b883 100644 --- a/src/transformers/models/hubert/modeling_tf_hubert.py +++ b/src/transformers/models/hubert/modeling_tf_hubert.py @@ -1471,7 +1471,7 @@ def call( ... return batch - >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True) + >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") >>> ds = ds.map(map_to_array) >>> input_values = processor(ds["speech"][0], return_tensors="tf").input_values # Batch size 1 @@ -1583,7 +1583,7 @@ def call( ... return batch - >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True) + >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") >>> ds = ds.map(map_to_array) >>> input_values = processor(ds["speech"][0], return_tensors="tf").input_values # Batch size 1 diff --git a/src/transformers/models/speech_encoder_decoder/modeling_speech_encoder_decoder.py b/src/transformers/models/speech_encoder_decoder/modeling_speech_encoder_decoder.py index a46a1d62af1163..c2f5dd0259093b 100644 --- a/src/transformers/models/speech_encoder_decoder/modeling_speech_encoder_decoder.py +++ b/src/transformers/models/speech_encoder_decoder/modeling_speech_encoder_decoder.py @@ -464,7 +464,7 @@ def forward( >>> processor = AutoProcessor.from_pretrained("facebook/wav2vec2-xls-r-300m-en-to-15") >>> model = SpeechEncoderDecoderModel.from_pretrained("facebook/wav2vec2-xls-r-300m-en-to-15") - >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True) + >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") >>> input_values = processor(ds[0]["audio"]["array"], return_tensors="pt").input_values >>> # Inference: Translate English speech to German diff --git a/src/transformers/models/speech_to_text/modeling_speech_to_text.py b/src/transformers/models/speech_to_text/modeling_speech_to_text.py index 9832987f4e6433..8353a172b2120f 100755 --- a/src/transformers/models/speech_to_text/modeling_speech_to_text.py +++ b/src/transformers/models/speech_to_text/modeling_speech_to_text.py @@ -1129,7 +1129,7 @@ def forward( >>> model = Speech2TextModel.from_pretrained("facebook/s2t-small-librispeech-asr") >>> feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/s2t-small-librispeech-asr") - >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True) + >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") >>> inputs = feature_extractor( ... ds[0]["audio"]["array"], sampling_rate=ds[0]["audio"]["sampling_rate"], return_tensors="pt" ... ) @@ -1270,7 +1270,7 @@ def forward( >>> processor = Speech2TextProcessor.from_pretrained("facebook/s2t-small-librispeech-asr") - >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True) + >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") >>> inputs = processor( ... ds[0]["audio"]["array"], sampling_rate=ds[0]["audio"]["sampling_rate"], return_tensors="pt" diff --git a/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py b/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py index 6ad680d4fc0725..bac1256ca4b672 100755 --- a/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py +++ b/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py @@ -1483,7 +1483,7 @@ def call( ... return batch - >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True) + >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") >>> ds = ds.map(map_to_array) >>> ds.set_format(type="tf") diff --git a/src/transformers/models/univnet/modeling_univnet.py b/src/transformers/models/univnet/modeling_univnet.py index 887493fdcf55f3..5b0c659c302a7c 100644 --- a/src/transformers/models/univnet/modeling_univnet.py +++ b/src/transformers/models/univnet/modeling_univnet.py @@ -525,7 +525,7 @@ def forward( >>> model = UnivNetModel.from_pretrained("dg845/univnet-dev") >>> feature_extractor = UnivNetFeatureExtractor.from_pretrained("dg845/univnet-dev") - >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True) + >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") >>> # Resample the audio to the feature extractor's sampling rate. >>> ds = ds.cast_column("audio", Audio(sampling_rate=feature_extractor.sampling_rate)) >>> inputs = feature_extractor( diff --git a/src/transformers/models/wav2vec2/modeling_flax_wav2vec2.py b/src/transformers/models/wav2vec2/modeling_flax_wav2vec2.py index 7a629e24572af3..9a24b9d39fdaac 100644 --- a/src/transformers/models/wav2vec2/modeling_flax_wav2vec2.py +++ b/src/transformers/models/wav2vec2/modeling_flax_wav2vec2.py @@ -1076,7 +1076,7 @@ class FlaxWav2Vec2Model(FlaxWav2Vec2PreTrainedModel): ... return batch - >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True) + >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") >>> ds = ds.map(map_to_array) >>> input_values = processor( @@ -1195,7 +1195,7 @@ class FlaxWav2Vec2ForCTC(FlaxWav2Vec2PreTrainedModel): ... return batch - >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True) + >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") >>> ds = ds.map(map_to_array) >>> input_values = processor( @@ -1396,7 +1396,7 @@ def __call__( ... return batch - >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True) + >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") >>> ds = ds.map(map_to_array) >>> input_values = feature_extractor(ds["speech"][0], return_tensors="np").input_values # Batch size 1 diff --git a/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py b/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py index cc8478d5b3c06a..a8338e363d94a2 100644 --- a/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py +++ b/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py @@ -1542,7 +1542,7 @@ def call( ... return batch - >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True) + >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") >>> ds = ds.map(map_to_array) >>> input_values = processor(ds["speech"][0], return_tensors="tf").input_values # Batch size 1 @@ -1654,7 +1654,7 @@ def call( ... return batch - >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True) + >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") >>> ds = ds.map(map_to_array) >>> input_values = processor(ds["speech"][0], return_tensors="tf").input_values # Batch size 1 diff --git a/src/transformers/models/wav2vec2/modeling_wav2vec2.py b/src/transformers/models/wav2vec2/modeling_wav2vec2.py index 16e50cc06c52ce..f1d021b58ee538 100755 --- a/src/transformers/models/wav2vec2/modeling_wav2vec2.py +++ b/src/transformers/models/wav2vec2/modeling_wav2vec2.py @@ -1938,7 +1938,7 @@ def forward( >>> feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/wav2vec2-base") >>> model = Wav2Vec2ForPreTraining.from_pretrained("facebook/wav2vec2-base") - >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True) + >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") >>> input_values = feature_extractor(ds[0]["audio"]["array"], return_tensors="pt").input_values # Batch size 1 >>> # compute masked indices diff --git a/src/transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py b/src/transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py index 6f631e4683ad3a..c37dd980d4ed3b 100644 --- a/src/transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py +++ b/src/transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py @@ -1453,7 +1453,7 @@ def forward( >>> feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/wav2vec2-conformer-rel-pos-large") >>> model = Wav2Vec2ConformerForPreTraining.from_pretrained("facebook/wav2vec2-conformer-rel-pos-large") - >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True) + >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") >>> input_values = feature_extractor(ds[0]["audio"]["array"], return_tensors="pt").input_values # Batch size 1 >>> # compute masked indices diff --git a/src/transformers/models/whisper/generation_whisper.py b/src/transformers/models/whisper/generation_whisper.py index 0467362ea2c7ec..4a28eb9203852c 100644 --- a/src/transformers/models/whisper/generation_whisper.py +++ b/src/transformers/models/whisper/generation_whisper.py @@ -464,7 +464,7 @@ def generate( >>> processor = AutoProcessor.from_pretrained("openai/whisper-tiny.en") >>> model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en") - >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True) + >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") >>> inputs = processor(ds[0]["audio"]["array"], return_tensors="pt") >>> input_features = inputs.input_features diff --git a/src/transformers/models/whisper/modeling_flax_whisper.py b/src/transformers/models/whisper/modeling_flax_whisper.py index 9da592c107daef..cc4483963c6309 100644 --- a/src/transformers/models/whisper/modeling_flax_whisper.py +++ b/src/transformers/models/whisper/modeling_flax_whisper.py @@ -985,7 +985,7 @@ def encode( >>> processor = WhisperProcessor.from_pretrained("openai/whisper-tiny.en") >>> model = FlaxWhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en", from_pt=True) - >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True) + >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") >>> inputs = processor(ds[0]["audio"]["array"], return_tensors="np") >>> input_features = inputs.input_features >>> encoder_outputs = model.encode(input_features=input_features) @@ -1045,7 +1045,7 @@ def decode( >>> processor = WhisperProcessor.from_pretrained("openai/whisper-tiny.en") >>> model = FlaxWhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en", from_pt=True) - >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True) + >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") >>> input_features = processor(ds[0]["audio"]["array"], return_tensors="np").input_features >>> encoder_outputs = model.encode(input_features=input_features) @@ -1297,7 +1297,7 @@ def decode( >>> processor = WhisperProcessor.from_pretrained("openai/whisper-tiny.en") >>> model = FlaxWhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en", from_pt=True) - >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True) + >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") >>> inputs = processor(ds[0]["audio"]["array"], return_tensors="np") >>> input_features = inputs.input_features >>> encoder_outputs = model.encode(input_features=input_features) @@ -1516,7 +1516,7 @@ def update_inputs_for_generation(self, model_outputs, model_kwargs): >>> processor = WhisperProcessor.from_pretrained("openai/whisper-tiny.en") >>> model = FlaxWhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en", from_pt=True) - >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True) + >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") >>> inputs = processor(ds[0]["audio"]["array"], return_tensors="np") >>> input_features = inputs.input_features >>> generated_ids = model.generate(input_ids=input_features) diff --git a/src/transformers/models/whisper/modeling_tf_whisper.py b/src/transformers/models/whisper/modeling_tf_whisper.py index 6f50141bff9f40..18f55dce8a2224 100644 --- a/src/transformers/models/whisper/modeling_tf_whisper.py +++ b/src/transformers/models/whisper/modeling_tf_whisper.py @@ -1147,7 +1147,7 @@ def call( >>> model = TFWhisperModel.from_pretrained("openai/whisper-base") >>> feature_extractor = AutoFeatureExtractor.from_pretrained("openai/whisper-base") - >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True) + >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") >>> inputs = feature_extractor(ds[0]["audio"]["array"], return_tensors="tf") >>> input_features = inputs.input_features >>> decoder_input_ids = tf.convert_to_tensor([[1, 1]]) * model.config.decoder_start_token_id @@ -1283,7 +1283,7 @@ def call( >>> model = TFWhisperModel.from_pretrained("openai/whisper-base") >>> feature_extractor = AutoFeatureExtractor.from_pretrained("openai/whisper-base") - >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True) + >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") >>> inputs = feature_extractor(ds[0]["audio"]["array"], return_tensors="tf") >>> input_features = inputs.input_features >>> decoder_input_ids = tf.convert_to_tensor([[1, 1]]) * model.config.decoder_start_token_id @@ -1413,7 +1413,7 @@ def call( >>> processor = AutoProcessor.from_pretrained("openai/whisper-tiny.en") >>> model = TFWhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en") - >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True) + >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") >>> inputs = processor(ds[0]["audio"]["array"], return_tensors="tf") >>> input_features = inputs.input_features diff --git a/src/transformers/models/whisper/modeling_whisper.py b/src/transformers/models/whisper/modeling_whisper.py index 7ba2af00ad81c0..6db7da4b957cdc 100644 --- a/src/transformers/models/whisper/modeling_whisper.py +++ b/src/transformers/models/whisper/modeling_whisper.py @@ -1555,7 +1555,7 @@ def forward( >>> model = WhisperModel.from_pretrained("openai/whisper-base") >>> feature_extractor = AutoFeatureExtractor.from_pretrained("openai/whisper-base") - >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True) + >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") >>> inputs = feature_extractor(ds[0]["audio"]["array"], return_tensors="pt") >>> input_features = inputs.input_features >>> decoder_input_ids = torch.tensor([[1, 1]]) * model.config.decoder_start_token_id @@ -1698,7 +1698,7 @@ def forward( >>> processor = AutoProcessor.from_pretrained("openai/whisper-tiny.en") >>> model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en") - >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True) + >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") >>> inputs = processor(ds[0]["audio"]["array"], return_tensors="pt") >>> input_features = inputs.input_features @@ -1959,7 +1959,7 @@ def forward( >>> assistant_model = WhisperForCausalLM.from_pretrained("distil-whisper/distil-large-v2") - >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True) + >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") >>> sample = ds[0]["audio"] >>> input_features = processor( ... sample["array"], sampling_rate=sample["sampling_rate"], return_tensors="pt" diff --git a/tests/models/audio_spectrogram_transformer/test_feature_extraction_audio_spectrogram_transformer.py b/tests/models/audio_spectrogram_transformer/test_feature_extraction_audio_spectrogram_transformer.py index 967f1936215e97..fbe250908633db 100644 --- a/tests/models/audio_spectrogram_transformer/test_feature_extraction_audio_spectrogram_transformer.py +++ b/tests/models/audio_spectrogram_transformer/test_feature_extraction_audio_spectrogram_transformer.py @@ -153,9 +153,7 @@ def test_double_precision_pad(self): def _load_datasamples(self, num_samples): from datasets import load_dataset - ds = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ) + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") # automatic decoding with librispeech speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"] diff --git a/tests/models/clap/test_feature_extraction_clap.py b/tests/models/clap/test_feature_extraction_clap.py index 8f2d6df3cb6c78..d0e913df828b84 100644 --- a/tests/models/clap/test_feature_extraction_clap.py +++ b/tests/models/clap/test_feature_extraction_clap.py @@ -164,9 +164,7 @@ def test_double_precision_pad(self): # Copied from tests.models.whisper.test_feature_extraction_whisper.WhisperFeatureExtractionTest._load_datasamples def _load_datasamples(self, num_samples): - ds = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ) + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") # automatic decoding with librispeech speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"] diff --git a/tests/models/clap/test_modeling_clap.py b/tests/models/clap/test_modeling_clap.py index 8e3392133f1f26..9f8cc62d2e0fc3 100644 --- a/tests/models/clap/test_modeling_clap.py +++ b/tests/models/clap/test_modeling_clap.py @@ -665,9 +665,7 @@ def test_integration_unfused(self): "repeat": 0.0023, } - librispeech_dummy = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ) + librispeech_dummy = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") audio_sample = librispeech_dummy[-1] model_id = "laion/clap-htsat-unfused" @@ -694,9 +692,7 @@ def test_integration_fused(self): "pad": -0.000379, } - librispeech_dummy = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ) + librispeech_dummy = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") audio_sample = librispeech_dummy[-1] model_id = "laion/clap-htsat-fused" @@ -723,9 +719,7 @@ def test_batched_fused(self): "pad": 0.0006, } - librispeech_dummy = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ) + librispeech_dummy = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") audio_samples = [sample["array"] for sample in librispeech_dummy[0:4]["audio"]] model_id = "laion/clap-htsat-fused" @@ -752,9 +746,7 @@ def test_batched_unfused(self): "pad": 0.0019, } - librispeech_dummy = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ) + librispeech_dummy = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") audio_samples = [sample["array"] for sample in librispeech_dummy[0:4]["audio"]] model_id = "laion/clap-htsat-unfused" diff --git a/tests/models/clvp/test_feature_extraction_clvp.py b/tests/models/clvp/test_feature_extraction_clvp.py index 83be97e8675434..db641eaf6145cb 100644 --- a/tests/models/clvp/test_feature_extraction_clvp.py +++ b/tests/models/clvp/test_feature_extraction_clvp.py @@ -209,9 +209,7 @@ def test_double_precision_pad(self): self.assertTrue(pt_processed.input_features.dtype == torch.float32) def _load_datasamples(self, num_samples): - ds = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ) + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") ds = ds.cast_column("audio", Audio(sampling_rate=22050)) # automatic decoding with librispeech speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"] diff --git a/tests/models/clvp/test_modeling_clvp.py b/tests/models/clvp/test_modeling_clvp.py index 5d17d3fed622cd..0cf89a74523364 100644 --- a/tests/models/clvp/test_modeling_clvp.py +++ b/tests/models/clvp/test_modeling_clvp.py @@ -371,9 +371,7 @@ def get_config(self): def prepare_config_and_inputs(self): _, input_ids, attention_mask = self.clvp_encoder_tester.prepare_config_and_inputs() - ds = datasets.load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ) + ds = datasets.load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") ds = ds.cast_column("audio", datasets.Audio(sampling_rate=22050)) _, audio, sr = ds.sort("id").select(range(1))[:1]["audio"][0].values() @@ -555,9 +553,7 @@ def test_model_from_pretrained(self): class ClvpIntegrationTest(unittest.TestCase): def setUp(self): self.text = "This is an example text." - ds = datasets.load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ) + ds = datasets.load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") ds = ds.cast_column("audio", datasets.Audio(sampling_rate=22050)) _, self.speech_samples, self.sr = ds.sort("id").select(range(1))[:1]["audio"][0].values() diff --git a/tests/models/data2vec/test_modeling_data2vec_audio.py b/tests/models/data2vec/test_modeling_data2vec_audio.py index 8bb16760ce61e9..d43128286853a5 100644 --- a/tests/models/data2vec/test_modeling_data2vec_audio.py +++ b/tests/models/data2vec/test_modeling_data2vec_audio.py @@ -694,9 +694,7 @@ def test_compute_mask_indices_short_audio(self): @slow class Data2VecAudioModelIntegrationTest(unittest.TestCase): def _load_datasamples(self, num_samples): - ds = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ) + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") # automatic decoding with librispeech speech_samples = ds.sort("id").filter( lambda x: x["id"] in [f"1272-141231-000{i}" for i in range(num_samples)] diff --git a/tests/models/encodec/test_feature_extraction_encodec.py b/tests/models/encodec/test_feature_extraction_encodec.py index 73c5019b11edda..e56517ac410661 100644 --- a/tests/models/encodec/test_feature_extraction_encodec.py +++ b/tests/models/encodec/test_feature_extraction_encodec.py @@ -138,9 +138,7 @@ def test_double_precision_pad(self): def _load_datasamples(self, num_samples): from datasets import load_dataset - ds = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ) + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") # automatic decoding with librispeech audio_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"] diff --git a/tests/models/encodec/test_modeling_encodec.py b/tests/models/encodec/test_modeling_encodec.py index 0a023894d8a00a..cff297be8e0002 100644 --- a/tests/models/encodec/test_modeling_encodec.py +++ b/tests/models/encodec/test_modeling_encodec.py @@ -461,9 +461,7 @@ def test_integration_24kHz(self): "1.5": [371955], "24.0": [6659962], } - librispeech_dummy = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ) + librispeech_dummy = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") model_id = "facebook/encodec_24khz" model = EncodecModel.from_pretrained(model_id).to(torch_device) @@ -517,9 +515,7 @@ def test_integration_48kHz(self): "3.0": [144259, 146765, 156435, 176871, 161971], "24.0": [1568553, 1294948, 1306190, 1464747, 1663150], } - librispeech_dummy = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ) + librispeech_dummy = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") model_id = "facebook/encodec_48khz" model = EncodecModel.from_pretrained(model_id).to(torch_device) @@ -581,9 +577,7 @@ def test_batch_48kHz(self): [85561, 81870, 76953, 48967, 79315, 85442, 81479, 107241], ], } - librispeech_dummy = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ) + librispeech_dummy = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") model_id = "facebook/encodec_48khz" model = EncodecModel.from_pretrained(model_id).to(torch_device) diff --git a/tests/models/hubert/test_modeling_hubert.py b/tests/models/hubert/test_modeling_hubert.py index cd801be41d7b3d..86f2b4119324ae 100644 --- a/tests/models/hubert/test_modeling_hubert.py +++ b/tests/models/hubert/test_modeling_hubert.py @@ -753,9 +753,7 @@ class HubertModelIntegrationTest(unittest.TestCase): def _load_datasamples(self, num_samples): from datasets import load_dataset - ds = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ) + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") # automatic decoding with librispeech speech_samples = ds.sort("id").filter( lambda x: x["id"] in [f"1272-141231-000{i}" for i in range(num_samples)] diff --git a/tests/models/hubert/test_modeling_tf_hubert.py b/tests/models/hubert/test_modeling_tf_hubert.py index 35a8d98c233f77..3685e6598740c5 100644 --- a/tests/models/hubert/test_modeling_tf_hubert.py +++ b/tests/models/hubert/test_modeling_tf_hubert.py @@ -609,9 +609,7 @@ class TFHubertModelIntegrationTest(unittest.TestCase): def _load_datasamples(self, num_samples): from datasets import load_dataset - ds = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ) + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") # automatic decoding with librispeech speech_samples = ds.sort("id").filter( lambda x: x["id"] in [f"1272-141231-000{i}" for i in range(num_samples)] diff --git a/tests/models/pop2piano/test_feature_extraction_pop2piano.py b/tests/models/pop2piano/test_feature_extraction_pop2piano.py index 5a4652ad577cd7..c6766147975962 100644 --- a/tests/models/pop2piano/test_feature_extraction_pop2piano.py +++ b/tests/models/pop2piano/test_feature_extraction_pop2piano.py @@ -136,9 +136,7 @@ def test_call(self): self.assertTrue(input_features.extrapolated_beatstep.ndim == 2) def test_integration(self): - ds = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ) + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") speech_samples = ds.sort("id").select([0])["audio"] input_speech = [x["array"] for x in speech_samples][0] sampling_rate = [x["sampling_rate"] for x in speech_samples][0] diff --git a/tests/models/pop2piano/test_processor_pop2piano.py b/tests/models/pop2piano/test_processor_pop2piano.py index 634cdd26bd105c..06a8bacfd8a45c 100644 --- a/tests/models/pop2piano/test_processor_pop2piano.py +++ b/tests/models/pop2piano/test_processor_pop2piano.py @@ -111,9 +111,7 @@ def test_save_load_pretrained_additional_features(self): def get_inputs(self): """get inputs for both feature extractor and tokenizer""" - ds = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ) + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") speech_samples = ds.sort("id").select([0])["audio"] input_speech = [x["array"] for x in speech_samples][0] sampling_rate = [x["sampling_rate"] for x in speech_samples][0] diff --git a/tests/models/seamless_m4t/test_feature_extraction_seamless_m4t.py b/tests/models/seamless_m4t/test_feature_extraction_seamless_m4t.py index d9919e0adea6cc..a8fca4b90ba941 100644 --- a/tests/models/seamless_m4t/test_feature_extraction_seamless_m4t.py +++ b/tests/models/seamless_m4t/test_feature_extraction_seamless_m4t.py @@ -258,9 +258,7 @@ def test_double_precision_pad(self): self.assertTrue(pt_processed.input_features.dtype == torch.float32) def _load_datasample(self, id): - ds = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ) + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") # automatic decoding with librispeech speech_sample = ds.sort("id")[id]["audio"]["array"] diff --git a/tests/models/sew/test_modeling_sew.py b/tests/models/sew/test_modeling_sew.py index fe10d994450be7..6b21c2e9f7128e 100644 --- a/tests/models/sew/test_modeling_sew.py +++ b/tests/models/sew/test_modeling_sew.py @@ -494,9 +494,7 @@ class SEWModelIntegrationTest(unittest.TestCase): def _load_datasamples(self, num_samples): from datasets import load_dataset - ds = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ) + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") # automatic decoding with librispeech speech_samples = ds.sort("id").filter( lambda x: x["id"] in [f"1272-141231-000{i}" for i in range(num_samples)] diff --git a/tests/models/sew_d/test_modeling_sew_d.py b/tests/models/sew_d/test_modeling_sew_d.py index 9fd94fbfef2668..b2efdccdf07c3f 100644 --- a/tests/models/sew_d/test_modeling_sew_d.py +++ b/tests/models/sew_d/test_modeling_sew_d.py @@ -508,9 +508,7 @@ class SEWDModelIntegrationTest(unittest.TestCase): def _load_datasamples(self, num_samples): from datasets import load_dataset - ds = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ) + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") # automatic decoding with librispeech speech_samples = ds.sort("id").filter( lambda x: x["id"] in [f"1272-141231-000{i}" for i in range(num_samples)] diff --git a/tests/models/speech_to_text/test_feature_extraction_speech_to_text.py b/tests/models/speech_to_text/test_feature_extraction_speech_to_text.py index 6c8861e3d8689f..9023e8467f736c 100644 --- a/tests/models/speech_to_text/test_feature_extraction_speech_to_text.py +++ b/tests/models/speech_to_text/test_feature_extraction_speech_to_text.py @@ -259,9 +259,7 @@ def test_double_precision_pad(self): def _load_datasamples(self, num_samples): from datasets import load_dataset - ds = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ) + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") # automatic decoding with librispeech speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"] diff --git a/tests/models/speech_to_text/test_modeling_speech_to_text.py b/tests/models/speech_to_text/test_modeling_speech_to_text.py index 44672f1c588f31..4f19cc01b33c70 100644 --- a/tests/models/speech_to_text/test_modeling_speech_to_text.py +++ b/tests/models/speech_to_text/test_modeling_speech_to_text.py @@ -793,9 +793,7 @@ def default_processor(self): def _load_datasamples(self, num_samples): from datasets import load_dataset - ds = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ) + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") # automatic decoding with librispeech speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"] diff --git a/tests/models/speech_to_text/test_modeling_tf_speech_to_text.py b/tests/models/speech_to_text/test_modeling_tf_speech_to_text.py index d12174533395df..c2fd215f388575 100644 --- a/tests/models/speech_to_text/test_modeling_tf_speech_to_text.py +++ b/tests/models/speech_to_text/test_modeling_tf_speech_to_text.py @@ -587,9 +587,7 @@ def default_processor(self): def _load_datasamples(self, num_samples): from datasets import load_dataset - ds = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ) + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") # automatic decoding with librispeech speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"] diff --git a/tests/models/speecht5/test_feature_extraction_speecht5.py b/tests/models/speecht5/test_feature_extraction_speecht5.py index f8f7f53cac200c..5ec632e7e76c63 100644 --- a/tests/models/speecht5/test_feature_extraction_speecht5.py +++ b/tests/models/speecht5/test_feature_extraction_speecht5.py @@ -380,9 +380,7 @@ def test_attention_mask_with_truncation_target(self): def _load_datasamples(self, num_samples): from datasets import load_dataset - ds = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ) + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") # automatic decoding with librispeech speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"] diff --git a/tests/models/speecht5/test_modeling_speecht5.py b/tests/models/speecht5/test_modeling_speecht5.py index 1d67bb4f8ab527..7a8aab83272bc8 100644 --- a/tests/models/speecht5/test_modeling_speecht5.py +++ b/tests/models/speecht5/test_modeling_speecht5.py @@ -744,9 +744,7 @@ def default_processor(self): def _load_datasamples(self, num_samples): from datasets import load_dataset - ds = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ) + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") # automatic decoding with librispeech speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"] @@ -1771,9 +1769,7 @@ def default_processor(self): def _load_datasamples(self, num_samples): from datasets import load_dataset - ds = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ) + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") # automatic decoding with librispeech speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"] diff --git a/tests/models/unispeech/test_modeling_unispeech.py b/tests/models/unispeech/test_modeling_unispeech.py index 1804e2c95ef4ce..d0a1d352243b19 100644 --- a/tests/models/unispeech/test_modeling_unispeech.py +++ b/tests/models/unispeech/test_modeling_unispeech.py @@ -549,9 +549,7 @@ def test_model_from_pretrained(self): @slow class UniSpeechModelIntegrationTest(unittest.TestCase): def _load_datasamples(self, num_samples): - ds = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ) + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") # automatic decoding with librispeech speech_samples = ds.sort("id").filter( lambda x: x["id"] in [f"1272-141231-000{i}" for i in range(num_samples)] diff --git a/tests/models/unispeech_sat/test_modeling_unispeech_sat.py b/tests/models/unispeech_sat/test_modeling_unispeech_sat.py index f3d467f0795d11..1aa2da20d5ec85 100644 --- a/tests/models/unispeech_sat/test_modeling_unispeech_sat.py +++ b/tests/models/unispeech_sat/test_modeling_unispeech_sat.py @@ -806,9 +806,7 @@ def test_model_from_pretrained(self): @slow class UniSpeechSatModelIntegrationTest(unittest.TestCase): def _load_datasamples(self, num_samples): - ds = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ) + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") # automatic decoding with librispeech speech_samples = ds.sort("id").filter( lambda x: x["id"] in [f"1272-141231-000{i}" for i in range(num_samples)] diff --git a/tests/models/univnet/test_feature_extraction_univnet.py b/tests/models/univnet/test_feature_extraction_univnet.py index 673faaae9adaca..dfa335d15383ee 100644 --- a/tests/models/univnet/test_feature_extraction_univnet.py +++ b/tests/models/univnet/test_feature_extraction_univnet.py @@ -327,9 +327,7 @@ def test_double_precision_pad(self): self.assertTrue(pt_processed.input_features.dtype == torch.float32) def _load_datasamples(self, num_samples): - ds = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ) + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") ds = ds.cast_column("audio", Audio(sampling_rate=self.feat_extract_tester.sampling_rate)) # automatic decoding with librispeech speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"] diff --git a/tests/models/univnet/test_modeling_univnet.py b/tests/models/univnet/test_modeling_univnet.py index 4dc28b3c168b1d..e160c799b786cb 100644 --- a/tests/models/univnet/test_modeling_univnet.py +++ b/tests/models/univnet/test_modeling_univnet.py @@ -216,9 +216,7 @@ def tearDown(self): torch.cuda.empty_cache() def _load_datasamples(self, num_samples, sampling_rate=24000): - ds = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ) + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") ds = ds.cast_column("audio", Audio(sampling_rate=sampling_rate)) # automatic decoding with librispeech speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"] diff --git a/tests/models/wav2vec2/test_modeling_flax_wav2vec2.py b/tests/models/wav2vec2/test_modeling_flax_wav2vec2.py index 18252a17524337..b91d66654de6ae 100644 --- a/tests/models/wav2vec2/test_modeling_flax_wav2vec2.py +++ b/tests/models/wav2vec2/test_modeling_flax_wav2vec2.py @@ -489,9 +489,7 @@ def test_sample_negatives_with_attn_mask(self): @slow class FlaxWav2Vec2ModelIntegrationTest(unittest.TestCase): def _load_datasamples(self, num_samples): - ds = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ) + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") # automatic decoding with librispeech speech_samples = ds.sort("id").filter( lambda x: x["id"] in [f"1272-141231-000{i}" for i in range(num_samples)] diff --git a/tests/models/wav2vec2/test_modeling_tf_wav2vec2.py b/tests/models/wav2vec2/test_modeling_tf_wav2vec2.py index 2f10e3378d736a..7ef97290e61c9f 100644 --- a/tests/models/wav2vec2/test_modeling_tf_wav2vec2.py +++ b/tests/models/wav2vec2/test_modeling_tf_wav2vec2.py @@ -716,9 +716,7 @@ def tearDown(self): gc.collect() def _load_datasamples(self, num_samples): - ds = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ) + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") # automatic decoding with librispeech speech_samples = ds.sort("id").filter( lambda x: x["id"] in [f"1272-141231-000{i}" for i in range(num_samples)] diff --git a/tests/models/wav2vec2/test_modeling_wav2vec2.py b/tests/models/wav2vec2/test_modeling_wav2vec2.py index 51d105a5ee3f80..ff7a85218d3a00 100644 --- a/tests/models/wav2vec2/test_modeling_wav2vec2.py +++ b/tests/models/wav2vec2/test_modeling_wav2vec2.py @@ -1464,9 +1464,7 @@ def tearDown(self): backend_empty_cache(torch_device) def _load_datasamples(self, num_samples): - ds = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ) + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") # automatic decoding with librispeech speech_samples = ds.sort("id").filter( lambda x: x["id"] in [f"1272-141231-000{i}" for i in range(num_samples)] diff --git a/tests/models/wav2vec2_bert/test_modeling_wav2vec2_bert.py b/tests/models/wav2vec2_bert/test_modeling_wav2vec2_bert.py index 0fbd000edc8c3d..80237fea9d1e43 100644 --- a/tests/models/wav2vec2_bert/test_modeling_wav2vec2_bert.py +++ b/tests/models/wav2vec2_bert/test_modeling_wav2vec2_bert.py @@ -855,9 +855,7 @@ def test_sample_negatives_with_mask(self): @slow class Wav2Vec2BertModelIntegrationTest(unittest.TestCase): def _load_datasamples(self, num_samples): - ds = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ) + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") # automatic decoding with librispeech speech_samples = ds.sort("id").filter(lambda x: x["id"] in [f"1272-141231-000{i}" for i in range(num_samples)]) speech_samples = speech_samples[:num_samples]["audio"] diff --git a/tests/models/wav2vec2_conformer/test_modeling_wav2vec2_conformer.py b/tests/models/wav2vec2_conformer/test_modeling_wav2vec2_conformer.py index ae13a8ecba9dea..096d1368ed02cb 100644 --- a/tests/models/wav2vec2_conformer/test_modeling_wav2vec2_conformer.py +++ b/tests/models/wav2vec2_conformer/test_modeling_wav2vec2_conformer.py @@ -863,9 +863,7 @@ def test_sample_negatives_with_mask(self): @slow class Wav2Vec2ConformerModelIntegrationTest(unittest.TestCase): def _load_datasamples(self, num_samples): - ds = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ) + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") # automatic decoding with librispeech speech_samples = ds.sort("id").filter(lambda x: x["id"] in [f"1272-141231-000{i}" for i in range(num_samples)]) speech_samples = speech_samples[:num_samples]["audio"] diff --git a/tests/models/wavlm/test_modeling_wavlm.py b/tests/models/wavlm/test_modeling_wavlm.py index 8f4d1e850e0056..b20792d83545d8 100644 --- a/tests/models/wavlm/test_modeling_wavlm.py +++ b/tests/models/wavlm/test_modeling_wavlm.py @@ -491,9 +491,7 @@ def test_model_from_pretrained(self): @slow class WavLMModelIntegrationTest(unittest.TestCase): def _load_datasamples(self, num_samples): - ds = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ) + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") # automatic decoding with librispeech speech_samples = ds.sort("id").filter( lambda x: x["id"] in [f"1272-141231-000{i}" for i in range(num_samples)] diff --git a/tests/models/whisper/test_feature_extraction_whisper.py b/tests/models/whisper/test_feature_extraction_whisper.py index 579c42519ae033..a8295542f4e377 100644 --- a/tests/models/whisper/test_feature_extraction_whisper.py +++ b/tests/models/whisper/test_feature_extraction_whisper.py @@ -215,9 +215,7 @@ def test_double_precision_pad(self): self.assertTrue(pt_processed.input_features.dtype == torch.float32) def _load_datasamples(self, num_samples): - ds = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ) + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") # automatic decoding with librispeech speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"] diff --git a/tests/models/whisper/test_modeling_flax_whisper.py b/tests/models/whisper/test_modeling_flax_whisper.py index d5e18d22c2f3c8..4b8092e800ad69 100644 --- a/tests/models/whisper/test_modeling_flax_whisper.py +++ b/tests/models/whisper/test_modeling_flax_whisper.py @@ -410,9 +410,7 @@ def default_processor(self): return WhisperProcessor.from_pretrained("openai/whisper-base") def _load_datasamples(self, num_samples): - ds = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ) + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") # automatic decoding with librispeech speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"] diff --git a/tests/models/whisper/test_modeling_tf_whisper.py b/tests/models/whisper/test_modeling_tf_whisper.py index 97143cc4df5120..b200671e048f0e 100644 --- a/tests/models/whisper/test_modeling_tf_whisper.py +++ b/tests/models/whisper/test_modeling_tf_whisper.py @@ -704,7 +704,7 @@ def test_generate_with_prompt_ids_and_forced_decoder_ids(self): def _load_datasamples(num_samples): - ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True) + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") # automatic decoding with librispeech speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"] diff --git a/tests/models/whisper/test_modeling_whisper.py b/tests/models/whisper/test_modeling_whisper.py index a11097fe7dc391..5a59f7a72517a4 100644 --- a/tests/models/whisper/test_modeling_whisper.py +++ b/tests/models/whisper/test_modeling_whisper.py @@ -1835,9 +1835,7 @@ def default_processor(self): return WhisperProcessor.from_pretrained("openai/whisper-base") def _load_datasamples(self, num_samples): - ds = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ) + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") # automatic decoding with librispeech speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"] @@ -2718,9 +2716,7 @@ def test_speculative_decoding_distil(self): ) assistant_model.to(torch_device) - dataset = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ) + dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") sample = dataset[0]["audio"] input_features = processor(sample["array"], return_tensors="pt", sampling_rate=16_000).input_features @@ -2769,9 +2765,7 @@ def test_speculative_decoding_non_distil(self): ) assistant_model.to(torch_device) - dataset = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ) + dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") sample = dataset[0]["audio"] input_features = processor(sample["array"], return_tensors="pt", sampling_rate=16_000).input_features @@ -2812,7 +2806,7 @@ def test_whisper_longform_single_batch(self): model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en") model = model.to(torch_device) - ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", trust_remote_code=True) + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean") one_audio = np.concatenate([x["array"] for x in ds["validation"]["audio"]], dtype=np.float32) input_features = processor( @@ -2848,9 +2842,7 @@ def test_whisper_longform_prompt_ids(self): prompt = "Mr. Kilter, Brionno." # let's force Quilter -> Kilter, Brion -> Brionno prompt_ids = processor.get_prompt_ids(prompt, return_tensors="pt").to(torch_device) - ds = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation[:-1]", trust_remote_code=True - ) + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation[:-1]") one_audio = np.concatenate([x["array"] for x in ds["audio"]], dtype=np.float32) first_text = ds[0]["text"].lower() @@ -2901,7 +2893,7 @@ def test_whisper_longform_single_batch_prev_cond(self): model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en") model = model.to(torch_device) - ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", trust_remote_code=True) + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean") one_audio = np.concatenate([x["array"] for x in ds["validation"]["audio"]], dtype=np.float32) input_features = processor( @@ -2983,7 +2975,7 @@ def test_whisper_longform_single_batch_beam(self): model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en") model = model.to(torch_device) - ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", trust_remote_code=True) + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean") one_audio = np.concatenate([x["array"] for x in ds["validation"]["audio"]], dtype=np.float32) input_features = processor( @@ -3025,7 +3017,7 @@ def test_whisper_longform_multi_batch(self): model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en") model = model.to(torch_device) - ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", trust_remote_code=True) + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean") one_audio = np.concatenate([x["array"] for x in ds["validation"]["audio"]], dtype=np.float32) audios = [] audios.append(one_audio[110000:]) @@ -3079,7 +3071,7 @@ def test_whisper_longform_multi_batch_prev_cond(self): model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny") model = model.to(torch_device) - ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", trust_remote_code=True) + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean") one_audio = np.concatenate([x["array"] for x in ds["validation"]["audio"]], dtype=np.float32) audios = [] audios.append(one_audio[110000:]) diff --git a/tests/pipelines/test_pipelines_audio_classification.py b/tests/pipelines/test_pipelines_audio_classification.py index a8c5deb2284452..1f403a8be05d2a 100644 --- a/tests/pipelines/test_pipelines_audio_classification.py +++ b/tests/pipelines/test_pipelines_audio_classification.py @@ -71,9 +71,7 @@ def run_torchaudio(self, audio_classifier): import datasets # test with a local file - dataset = datasets.load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ) + dataset = datasets.load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") audio = dataset[0]["audio"]["array"] output = audio_classifier(audio) self.assertEqual( diff --git a/tests/pipelines/test_pipelines_automatic_speech_recognition.py b/tests/pipelines/test_pipelines_automatic_speech_recognition.py index 82c5580f0ea2cc..d8810f67eec15e 100644 --- a/tests/pipelines/test_pipelines_automatic_speech_recognition.py +++ b/tests/pipelines/test_pipelines_automatic_speech_recognition.py @@ -294,9 +294,7 @@ def test_torch_large(self): output = speech_recognizer(waveform) self.assertEqual(output, {"text": ""}) - ds = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ).sort("id") + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id") filename = ds[40]["file"] output = speech_recognizer(filename) self.assertEqual(output, {"text": "A MAN SAID TO THE UNIVERSE SIR I EXIST"}) @@ -313,9 +311,7 @@ def test_torch_large_with_input_features(self): output = speech_recognizer(waveform) self.assertEqual(output, {"text": ""}) - ds = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ).sort("id") + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id") filename = ds[40]["file"] output = speech_recognizer(filename) self.assertEqual(output, {"text": "a man said to the universe sir i exist"}) @@ -545,9 +541,7 @@ def test_torch_whisper(self): model="openai/whisper-tiny", framework="pt", ) - ds = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ).sort("id") + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id") filename = ds[40]["file"] output = speech_recognizer(filename) self.assertEqual(output, {"text": " A man said to the universe, Sir, I exist."}) @@ -722,9 +716,7 @@ def test_find_longest_common_subsequence(self): @slow @require_torch def test_whisper_timestamp_prediction(self): - ds = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ).sort("id") + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id") array = np.concatenate( [ds[40]["audio"]["array"], ds[41]["audio"]["array"], ds[42]["audio"]["array"], ds[43]["audio"]["array"]] ) @@ -822,9 +814,7 @@ def test_whisper_timestamp_prediction(self): @slow @require_torch def test_whisper_large_timestamp_prediction(self): - ds = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ).sort("id") + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id") array = np.concatenate( [ds[40]["audio"]["array"], ds[41]["audio"]["array"], ds[42]["audio"]["array"], ds[43]["audio"]["array"]] ) @@ -918,9 +908,7 @@ def test_whisper_word_timestamps_batched(self): chunk_length_s=3, return_timestamps="word", ) - data = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ) + data = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") sample = data[0]["audio"] # not the same output as test_simple_whisper_asr because of chunking @@ -963,9 +951,7 @@ def test_whisper_large_word_timestamps_batched(self): model="openai/whisper-large-v3", return_timestamps="word", ) - data = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ) + data = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") sample = data[0]["audio"] # not the same output as test_simple_whisper_asr because of chunking @@ -1010,9 +996,7 @@ def test_torch_speech_encoder_decoder(self): framework="pt", ) - ds = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ).sort("id") + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id") filename = ds[40]["file"] output = speech_recognizer(filename) self.assertEqual(output, {"text": 'Ein Mann sagte zum Universum : " Sir, ich existiert! "'}) @@ -1030,9 +1014,7 @@ def test_simple_wav2vec2(self): output = asr(waveform) self.assertEqual(output, {"text": ""}) - ds = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ).sort("id") + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id") filename = ds[40]["file"] output = asr(filename) self.assertEqual(output, {"text": "A MAN SAID TO THE UNIVERSE SIR I EXIST"}) @@ -1058,9 +1040,7 @@ def test_simple_s2t(self): output = asr(waveform) self.assertEqual(output, {"text": "(Applausi)"}) - ds = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ).sort("id") + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id") filename = ds[40]["file"] output = asr(filename) self.assertEqual(output, {"text": "Un uomo disse all'universo: \"Signore, io esisto."}) @@ -1080,9 +1060,7 @@ def test_simple_whisper_asr(self): model="openai/whisper-tiny.en", framework="pt", ) - ds = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ) + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") filename = ds[0]["file"] output = speech_recognizer(filename) self.assertEqual( @@ -1151,9 +1129,7 @@ def test_simple_whisper_translation(self): model="openai/whisper-large", framework="pt", ) - ds = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ).sort("id") + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id") filename = ds[40]["file"] output = speech_recognizer(filename) self.assertEqual(output, {"text": " A man said to the universe, Sir, I exist."}) @@ -1188,9 +1164,7 @@ def test_whisper_language(self): model="openai/whisper-tiny.en", framework="pt", ) - ds = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ) + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") filename = ds[0]["file"] # 1. English-only model compatible with no language argument @@ -1323,9 +1297,7 @@ def test_xls_r_to_en(self): framework="pt", ) - ds = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ).sort("id") + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id") filename = ds[40]["file"] output = speech_recognizer(filename) self.assertEqual(output, {"text": "A man said to the universe: “Sir, I exist."}) @@ -1341,9 +1313,7 @@ def test_xls_r_from_en(self): framework="pt", ) - ds = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ).sort("id") + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id") filename = ds[40]["file"] output = speech_recognizer(filename) self.assertEqual(output, {"text": "Ein Mann sagte zu dem Universum, Sir, ich bin da."}) @@ -1360,9 +1330,7 @@ def test_speech_to_text_leveraged(self): framework="pt", ) - ds = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ).sort("id") + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id") filename = ds[40]["file"] output = speech_recognizer(filename) @@ -1379,9 +1347,7 @@ def test_wav2vec2_conformer_float16(self): framework="pt", ) - dataset = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ) + dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") sample = dataset[0]["audio"] output = speech_recognizer(sample) @@ -1398,9 +1364,7 @@ def test_chunking_fast(self): chunk_length_s=10.0, ) - ds = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ).sort("id") + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id") audio = ds[40]["audio"]["array"] n_repeats = 2 @@ -1416,9 +1380,7 @@ def test_return_timestamps_ctc_fast(self): model="hf-internal-testing/tiny-random-wav2vec2", ) - ds = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ).sort("id") + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id") # Take short audio to keep the test readable audio = ds[40]["audio"]["array"][:800] @@ -1462,9 +1424,7 @@ def test_chunking_fast_with_lm(self): chunk_length_s=10.0, ) - ds = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ).sort("id") + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id") audio = ds[40]["audio"]["array"] n_repeats = 2 @@ -1492,9 +1452,7 @@ def test_with_lm_fast(self): ) self.assertEqual(speech_recognizer.type, "ctc_with_lm") - ds = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ).sort("id") + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id") audio = ds[40]["audio"]["array"] n_repeats = 2 @@ -1522,9 +1480,7 @@ def test_with_local_lm_fast(self): ) self.assertEqual(speech_recognizer.type, "ctc_with_lm") - ds = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ).sort("id") + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id") audio = ds[40]["audio"]["array"] n_repeats = 2 @@ -1608,9 +1564,7 @@ def test_seamless_v2(self): device=torch_device, ) - dataset = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ) + dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") sample = dataset[0]["audio"] result = pipe(sample, generate_kwargs={"tgt_lang": "eng"}) @@ -1633,9 +1587,7 @@ def test_chunking_and_timestamps(self): chunk_length_s=10.0, ) - ds = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ).sort("id") + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id") audio = ds[40]["audio"]["array"] n_repeats = 10 @@ -1747,9 +1699,7 @@ def test_chunking_with_lm(self): model="patrickvonplaten/wav2vec2-base-100h-with-lm", chunk_length_s=10.0, ) - ds = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ).sort("id") + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id") audio = ds[40]["audio"]["array"] n_repeats = 10 diff --git a/tests/pipelines/test_pipelines_common.py b/tests/pipelines/test_pipelines_common.py index d4dbff218558e1..95349a8335de6d 100644 --- a/tests/pipelines/test_pipelines_common.py +++ b/tests/pipelines/test_pipelines_common.py @@ -840,9 +840,7 @@ def test_cached_pipeline_has_minimum_calls_to_head(self): def test_chunk_pipeline_batching_single_file(self): # Make sure we have cached the pipeline. pipe = pipeline(model="hf-internal-testing/tiny-random-Wav2Vec2ForCTC") - ds = datasets.load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ).sort("id") + ds = datasets.load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id") audio = ds[40]["audio"]["array"] pipe = pipeline(model="hf-internal-testing/tiny-random-Wav2Vec2ForCTC") diff --git a/tests/utils/test_audio_utils.py b/tests/utils/test_audio_utils.py index 47c384870d4a04..3e417bf7e3b450 100644 --- a/tests/utils/test_audio_utils.py +++ b/tests/utils/test_audio_utils.py @@ -262,9 +262,7 @@ def test_window_function(self): def _load_datasamples(self, num_samples): from datasets import load_dataset - ds = load_dataset( - "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True - ) + ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"] return [x["array"] for x in speech_samples]