From 68be1d3c1698eede18a4dc62e5c81814546a766b Mon Sep 17 00:00:00 2001 From: Yoach Lacombe <52246514+ylacombe@users.noreply.github.com> Date: Thu, 18 Apr 2024 17:18:01 +0200 Subject: [PATCH] fix Parameter dtype in audio models (#30310) --- src/transformers/models/data2vec/modeling_data2vec_audio.py | 6 +++--- src/transformers/models/hubert/modeling_hubert.py | 6 +++--- src/transformers/models/sew/modeling_sew.py | 6 +++--- src/transformers/models/sew_d/modeling_sew_d.py | 6 +++--- src/transformers/models/speecht5/modeling_speecht5.py | 6 +++--- src/transformers/models/unispeech/modeling_unispeech.py | 6 +++--- .../models/unispeech_sat/modeling_unispeech_sat.py | 6 +++--- src/transformers/models/wav2vec2/modeling_wav2vec2.py | 6 +++--- .../models/wav2vec2_bert/modeling_wav2vec2_bert.py | 6 +++--- .../wav2vec2_conformer/modeling_wav2vec2_conformer.py | 6 +++--- src/transformers/models/wavlm/modeling_wavlm.py | 6 +++--- 11 files changed, 33 insertions(+), 33 deletions(-) diff --git a/src/transformers/models/data2vec/modeling_data2vec_audio.py b/src/transformers/models/data2vec/modeling_data2vec_audio.py index b5300cca084fa6..6df96aa49bb267 100755 --- a/src/transformers/models/data2vec/modeling_data2vec_audio.py +++ b/src/transformers/models/data2vec/modeling_data2vec_audio.py @@ -822,7 +822,7 @@ def __init__(self, config: Data2VecAudioConfig): # model only needs masking vector if mask prob is > 0.0 if config.mask_time_prob > 0.0 or config.mask_feature_prob > 0.0: - self.masked_spec_embed = nn.Parameter(torch.FloatTensor(config.hidden_size).uniform_()) + self.masked_spec_embed = nn.Parameter(torch.Tensor(config.hidden_size).uniform_()) self.encoder = Data2VecAudioEncoder(config) @@ -858,7 +858,7 @@ def _mask_hidden_states( if mask_time_indices is not None: # apply SpecAugment along time axis with given mask_time_indices - hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype) + hidden_states[mask_time_indices] = self.masked_spec_embed elif self.config.mask_time_prob > 0 and self.training: mask_time_indices = _compute_mask_indices( (batch_size, sequence_length), @@ -868,7 +868,7 @@ def _mask_hidden_states( min_masks=self.config.mask_time_min_masks, ) mask_time_indices = torch.tensor(mask_time_indices, device=hidden_states.device, dtype=torch.bool) - hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype) + hidden_states[mask_time_indices] = self.masked_spec_embed if self.config.mask_feature_prob > 0 and self.training: # generate indices & apply SpecAugment along feature axis diff --git a/src/transformers/models/hubert/modeling_hubert.py b/src/transformers/models/hubert/modeling_hubert.py index f9e223f9a384d0..d17119426a55f8 100755 --- a/src/transformers/models/hubert/modeling_hubert.py +++ b/src/transformers/models/hubert/modeling_hubert.py @@ -974,7 +974,7 @@ def __init__(self, config: HubertConfig): self.feature_projection = HubertFeatureProjection(config) if config.mask_time_prob > 0.0 or config.mask_feature_prob > 0.0: - self.masked_spec_embed = nn.Parameter(torch.FloatTensor(config.hidden_size).uniform_()) + self.masked_spec_embed = nn.Parameter(torch.Tensor(config.hidden_size).uniform_()) if config.do_stable_layer_norm: self.encoder = HubertEncoderStableLayerNorm(config) @@ -1005,7 +1005,7 @@ def _mask_hidden_states( if mask_time_indices is not None: # apply SpecAugment along time axis with given mask_time_indices - hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype) + hidden_states[mask_time_indices] = self.masked_spec_embed elif self.config.mask_time_prob > 0 and self.training: mask_time_indices = _compute_mask_indices( (batch_size, sequence_length), @@ -1015,7 +1015,7 @@ def _mask_hidden_states( min_masks=self.config.mask_time_min_masks, ) mask_time_indices = torch.tensor(mask_time_indices, device=hidden_states.device, dtype=torch.bool) - hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype) + hidden_states[mask_time_indices] = self.masked_spec_embed if self.config.mask_feature_prob > 0 and self.training: # generate indices & apply SpecAugment along feature axis diff --git a/src/transformers/models/sew/modeling_sew.py b/src/transformers/models/sew/modeling_sew.py index 950a91fb6a54b1..67e8dbdcbd0610 100644 --- a/src/transformers/models/sew/modeling_sew.py +++ b/src/transformers/models/sew/modeling_sew.py @@ -834,7 +834,7 @@ def __init__(self, config: SEWConfig): self.feature_dropout = nn.Dropout(config.feat_proj_dropout) if config.mask_time_prob > 0.0 or config.mask_feature_prob > 0.0: - self.masked_spec_embed = nn.Parameter(torch.FloatTensor(config.hidden_size).uniform_()) + self.masked_spec_embed = nn.Parameter(torch.Tensor(config.hidden_size).uniform_()) self.encoder = SEWEncoder(config) @@ -862,7 +862,7 @@ def _mask_hidden_states( if mask_time_indices is not None: # apply SpecAugment along time axis with given mask_time_indices - hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype) + hidden_states[mask_time_indices] = self.masked_spec_embed elif self.config.mask_time_prob > 0 and self.training: mask_time_indices = _compute_mask_indices( (batch_size, sequence_length), @@ -872,7 +872,7 @@ def _mask_hidden_states( min_masks=self.config.mask_time_min_masks, ) mask_time_indices = torch.tensor(mask_time_indices, device=hidden_states.device, dtype=torch.bool) - hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype) + hidden_states[mask_time_indices] = self.masked_spec_embed if self.config.mask_feature_prob > 0 and self.training: # generate indices & apply SpecAugment along feature axis diff --git a/src/transformers/models/sew_d/modeling_sew_d.py b/src/transformers/models/sew_d/modeling_sew_d.py index aadcf6f6693c5b..07da31afab5628 100644 --- a/src/transformers/models/sew_d/modeling_sew_d.py +++ b/src/transformers/models/sew_d/modeling_sew_d.py @@ -1360,7 +1360,7 @@ def __init__(self, config: SEWDConfig): self.feature_dropout = nn.Dropout(config.feat_proj_dropout) if config.mask_time_prob > 0.0 or config.mask_feature_prob > 0.0: - self.masked_spec_embed = nn.Parameter(torch.FloatTensor(config.hidden_size).uniform_()) + self.masked_spec_embed = nn.Parameter(torch.Tensor(config.hidden_size).uniform_()) self.encoder = SEWDEncoder(config) @@ -1388,7 +1388,7 @@ def _mask_hidden_states( if mask_time_indices is not None: # apply SpecAugment along time axis with given mask_time_indices - hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype) + hidden_states[mask_time_indices] = self.masked_spec_embed elif self.config.mask_time_prob > 0 and self.training: mask_time_indices = _compute_mask_indices( (batch_size, sequence_length), @@ -1398,7 +1398,7 @@ def _mask_hidden_states( min_masks=self.config.mask_time_min_masks, ) mask_time_indices = torch.tensor(mask_time_indices, device=hidden_states.device, dtype=torch.bool) - hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype) + hidden_states[mask_time_indices] = self.masked_spec_embed if self.config.mask_feature_prob > 0 and self.training: # generate indices & apply SpecAugment along feature axis diff --git a/src/transformers/models/speecht5/modeling_speecht5.py b/src/transformers/models/speecht5/modeling_speecht5.py index 071b987dbb5a47..e2b38d019296be 100644 --- a/src/transformers/models/speecht5/modeling_speecht5.py +++ b/src/transformers/models/speecht5/modeling_speecht5.py @@ -517,7 +517,7 @@ def __init__(self, config): # model only needs masking vector if mask prob is > 0.0 if config.mask_time_prob > 0.0 or config.mask_feature_prob > 0.0: - self.masked_spec_embed = nn.Parameter(torch.FloatTensor(config.hidden_size).uniform_()) + self.masked_spec_embed = nn.Parameter(torch.Tensor(config.hidden_size).uniform_()) self.pos_conv_embed = SpeechT5PositionalConvEmbedding(config) self.pos_sinusoidal_embed = SpeechT5SinusoidalPositionalEmbedding( @@ -616,7 +616,7 @@ def _mask_hidden_states( if mask_time_indices is not None: # apply SpecAugment along time axis with given mask_time_indices - hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype) + hidden_states[mask_time_indices] = self.masked_spec_embed elif self.config.mask_time_prob > 0 and self.training: mask_time_indices = _compute_mask_indices( (batch_size, sequence_length), @@ -626,7 +626,7 @@ def _mask_hidden_states( min_masks=self.config.mask_time_min_masks, ) mask_time_indices = torch.tensor(mask_time_indices, device=hidden_states.device, dtype=torch.bool) - hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype) + hidden_states[mask_time_indices] = self.masked_spec_embed if self.config.mask_feature_prob > 0 and self.training: # generate indices & apply SpecAugment along feature axis diff --git a/src/transformers/models/unispeech/modeling_unispeech.py b/src/transformers/models/unispeech/modeling_unispeech.py index 473bc7d4ff12e4..47dae1f3a8fbfa 100755 --- a/src/transformers/models/unispeech/modeling_unispeech.py +++ b/src/transformers/models/unispeech/modeling_unispeech.py @@ -1090,7 +1090,7 @@ def __init__(self, config: UniSpeechConfig): self.feature_projection = UniSpeechFeatureProjection(config) if config.mask_time_prob > 0.0 or config.mask_feature_prob > 0.0: - self.masked_spec_embed = nn.Parameter(torch.FloatTensor(config.hidden_size).uniform_()) + self.masked_spec_embed = nn.Parameter(torch.Tensor(config.hidden_size).uniform_()) if config.do_stable_layer_norm: self.encoder = UniSpeechEncoderStableLayerNorm(config) @@ -1121,7 +1121,7 @@ def _mask_hidden_states( if mask_time_indices is not None: # apply SpecAugment along time axis with given mask_time_indices - hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype) + hidden_states[mask_time_indices] = self.masked_spec_embed elif self.config.mask_time_prob > 0 and self.training: mask_time_indices = _compute_mask_indices( (batch_size, sequence_length), @@ -1131,7 +1131,7 @@ def _mask_hidden_states( min_masks=self.config.mask_time_min_masks, ) mask_time_indices = torch.tensor(mask_time_indices, device=hidden_states.device, dtype=torch.bool) - hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype) + hidden_states[mask_time_indices] = self.masked_spec_embed if self.config.mask_feature_prob > 0 and self.training: # generate indices & apply SpecAugment along feature axis diff --git a/src/transformers/models/unispeech_sat/modeling_unispeech_sat.py b/src/transformers/models/unispeech_sat/modeling_unispeech_sat.py index f38da0d47f5c3d..2a882874fdb6f2 100755 --- a/src/transformers/models/unispeech_sat/modeling_unispeech_sat.py +++ b/src/transformers/models/unispeech_sat/modeling_unispeech_sat.py @@ -1108,7 +1108,7 @@ def __init__(self, config: UniSpeechSatConfig): self.feature_extractor = UniSpeechSatFeatureEncoder(config) self.feature_projection = UniSpeechSatFeatureProjection(config) - self.masked_spec_embed = nn.Parameter(torch.FloatTensor(config.hidden_size).uniform_()) + self.masked_spec_embed = nn.Parameter(torch.Tensor(config.hidden_size).uniform_()) if config.do_stable_layer_norm: self.encoder = UniSpeechSatEncoderStableLayerNorm(config) @@ -1139,7 +1139,7 @@ def _mask_hidden_states( if mask_time_indices is not None: # apply SpecAugment along time axis with given mask_time_indices - hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype) + hidden_states[mask_time_indices] = self.masked_spec_embed elif self.config.mask_time_prob > 0 and self.training: mask_time_indices = _compute_mask_indices( (batch_size, sequence_length), @@ -1149,7 +1149,7 @@ def _mask_hidden_states( min_masks=self.config.mask_time_min_masks, ) mask_time_indices = torch.tensor(mask_time_indices, device=hidden_states.device, dtype=torch.bool) - hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype) + hidden_states[mask_time_indices] = self.masked_spec_embed if self.config.mask_feature_prob > 0 and self.training: # generate indices & apply SpecAugment along feature axis diff --git a/src/transformers/models/wav2vec2/modeling_wav2vec2.py b/src/transformers/models/wav2vec2/modeling_wav2vec2.py index d40af1739c25db..0773af3a5618d9 100755 --- a/src/transformers/models/wav2vec2/modeling_wav2vec2.py +++ b/src/transformers/models/wav2vec2/modeling_wav2vec2.py @@ -1445,7 +1445,7 @@ def __init__(self, config: Wav2Vec2Config): # model only needs masking vector if mask prob is > 0.0 if config.mask_time_prob > 0.0 or config.mask_feature_prob > 0.0: - self.masked_spec_embed = nn.Parameter(torch.FloatTensor(config.hidden_size).uniform_()) + self.masked_spec_embed = nn.Parameter(torch.Tensor(config.hidden_size).uniform_()) if config.do_stable_layer_norm: self.encoder = Wav2Vec2EncoderStableLayerNorm(config) @@ -1496,7 +1496,7 @@ def _mask_hidden_states( if mask_time_indices is not None: # apply SpecAugment along time axis with given mask_time_indices - hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype) + hidden_states[mask_time_indices] = self.masked_spec_embed elif self.config.mask_time_prob > 0 and self.training: mask_time_indices = _compute_mask_indices( (batch_size, sequence_length), @@ -1506,7 +1506,7 @@ def _mask_hidden_states( min_masks=self.config.mask_time_min_masks, ) mask_time_indices = torch.tensor(mask_time_indices, device=hidden_states.device, dtype=torch.bool) - hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype) + hidden_states[mask_time_indices] = self.masked_spec_embed if self.config.mask_feature_prob > 0 and self.training: # generate indices & apply SpecAugment along feature axis diff --git a/src/transformers/models/wav2vec2_bert/modeling_wav2vec2_bert.py b/src/transformers/models/wav2vec2_bert/modeling_wav2vec2_bert.py index 6519faa931d688..21c76048aaf70b 100644 --- a/src/transformers/models/wav2vec2_bert/modeling_wav2vec2_bert.py +++ b/src/transformers/models/wav2vec2_bert/modeling_wav2vec2_bert.py @@ -1053,7 +1053,7 @@ def __init__(self, config: Wav2Vec2BertConfig): # model only needs masking vector if mask prob is > 0.0 if config.mask_time_prob > 0.0 or config.mask_feature_prob > 0.0: - self.masked_spec_embed = nn.Parameter(torch.FloatTensor(config.hidden_size).uniform_()) + self.masked_spec_embed = nn.Parameter(torch.Tensor(config.hidden_size).uniform_()) self.encoder = Wav2Vec2BertEncoder(config) @@ -1087,7 +1087,7 @@ def _mask_hidden_states( if mask_time_indices is not None: # apply SpecAugment along time axis with given mask_time_indices - hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype) + hidden_states[mask_time_indices] = self.masked_spec_embed elif self.config.mask_time_prob > 0 and self.training: mask_time_indices = _compute_mask_indices( (batch_size, sequence_length), @@ -1097,7 +1097,7 @@ def _mask_hidden_states( min_masks=self.config.mask_time_min_masks, ) mask_time_indices = torch.tensor(mask_time_indices, device=hidden_states.device, dtype=torch.bool) - hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype) + hidden_states[mask_time_indices] = self.masked_spec_embed if self.config.mask_feature_prob > 0 and self.training: # generate indices & apply SpecAugment along feature axis diff --git a/src/transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py b/src/transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py index 8354a88a517fa9..2c4f5c289af062 100644 --- a/src/transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py +++ b/src/transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py @@ -1235,7 +1235,7 @@ def __init__(self, config: Wav2Vec2ConformerConfig): # model only needs masking vector if mask prob is > 0.0 if config.mask_time_prob > 0.0 or config.mask_feature_prob > 0.0: - self.masked_spec_embed = nn.Parameter(torch.FloatTensor(config.hidden_size).uniform_()) + self.masked_spec_embed = nn.Parameter(torch.Tensor(config.hidden_size).uniform_()) self.encoder = Wav2Vec2ConformerEncoder(config) @@ -1273,7 +1273,7 @@ def _mask_hidden_states( if mask_time_indices is not None: # apply SpecAugment along time axis with given mask_time_indices - hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype) + hidden_states[mask_time_indices] = self.masked_spec_embed elif self.config.mask_time_prob > 0 and self.training: mask_time_indices = _compute_mask_indices( (batch_size, sequence_length), @@ -1283,7 +1283,7 @@ def _mask_hidden_states( min_masks=self.config.mask_time_min_masks, ) mask_time_indices = torch.tensor(mask_time_indices, device=hidden_states.device, dtype=torch.bool) - hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype) + hidden_states[mask_time_indices] = self.masked_spec_embed if self.config.mask_feature_prob > 0 and self.training: # generate indices & apply SpecAugment along feature axis diff --git a/src/transformers/models/wavlm/modeling_wavlm.py b/src/transformers/models/wavlm/modeling_wavlm.py index f46fc1ef4f01da..94833e86a1007c 100755 --- a/src/transformers/models/wavlm/modeling_wavlm.py +++ b/src/transformers/models/wavlm/modeling_wavlm.py @@ -1107,7 +1107,7 @@ def __init__(self, config: WavLMConfig): # model only needs masking vector if mask prob is > 0.0 if config.mask_time_prob > 0.0 or config.mask_feature_prob > 0.0: - self.masked_spec_embed = nn.Parameter(torch.FloatTensor(config.hidden_size).uniform_()) + self.masked_spec_embed = nn.Parameter(torch.Tensor(config.hidden_size).uniform_()) if config.do_stable_layer_norm: self.encoder = WavLMEncoderStableLayerNorm(config) @@ -1158,7 +1158,7 @@ def _mask_hidden_states( if mask_time_indices is not None: # apply SpecAugment along time axis with given mask_time_indices - hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype) + hidden_states[mask_time_indices] = self.masked_spec_embed elif self.config.mask_time_prob > 0 and self.training: mask_time_indices = _compute_mask_indices( (batch_size, sequence_length), @@ -1168,7 +1168,7 @@ def _mask_hidden_states( min_masks=self.config.mask_time_min_masks, ) mask_time_indices = torch.tensor(mask_time_indices, device=hidden_states.device, dtype=torch.bool) - hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype) + hidden_states[mask_time_indices] = self.masked_spec_embed if self.config.mask_feature_prob > 0 and self.training: # generate indices & apply SpecAugment along feature axis