Skip to content

Commit

Permalink
fix Parameter dtype in audio models (huggingface#30310)
Browse files Browse the repository at this point in the history
  • Loading branch information
ylacombe authored Apr 18, 2024
1 parent 7913214 commit 68be1d3
Show file tree
Hide file tree
Showing 11 changed files with 33 additions and 33 deletions.
6 changes: 3 additions & 3 deletions src/transformers/models/data2vec/modeling_data2vec_audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -822,7 +822,7 @@ def __init__(self, config: Data2VecAudioConfig):

# model only needs masking vector if mask prob is > 0.0
if config.mask_time_prob > 0.0 or config.mask_feature_prob > 0.0:
self.masked_spec_embed = nn.Parameter(torch.FloatTensor(config.hidden_size).uniform_())
self.masked_spec_embed = nn.Parameter(torch.Tensor(config.hidden_size).uniform_())

self.encoder = Data2VecAudioEncoder(config)

Expand Down Expand Up @@ -858,7 +858,7 @@ def _mask_hidden_states(

if mask_time_indices is not None:
# apply SpecAugment along time axis with given mask_time_indices
hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype)
hidden_states[mask_time_indices] = self.masked_spec_embed
elif self.config.mask_time_prob > 0 and self.training:
mask_time_indices = _compute_mask_indices(
(batch_size, sequence_length),
Expand All @@ -868,7 +868,7 @@ def _mask_hidden_states(
min_masks=self.config.mask_time_min_masks,
)
mask_time_indices = torch.tensor(mask_time_indices, device=hidden_states.device, dtype=torch.bool)
hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype)
hidden_states[mask_time_indices] = self.masked_spec_embed

if self.config.mask_feature_prob > 0 and self.training:
# generate indices & apply SpecAugment along feature axis
Expand Down
6 changes: 3 additions & 3 deletions src/transformers/models/hubert/modeling_hubert.py
Original file line number Diff line number Diff line change
Expand Up @@ -974,7 +974,7 @@ def __init__(self, config: HubertConfig):
self.feature_projection = HubertFeatureProjection(config)

if config.mask_time_prob > 0.0 or config.mask_feature_prob > 0.0:
self.masked_spec_embed = nn.Parameter(torch.FloatTensor(config.hidden_size).uniform_())
self.masked_spec_embed = nn.Parameter(torch.Tensor(config.hidden_size).uniform_())

if config.do_stable_layer_norm:
self.encoder = HubertEncoderStableLayerNorm(config)
Expand Down Expand Up @@ -1005,7 +1005,7 @@ def _mask_hidden_states(

if mask_time_indices is not None:
# apply SpecAugment along time axis with given mask_time_indices
hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype)
hidden_states[mask_time_indices] = self.masked_spec_embed
elif self.config.mask_time_prob > 0 and self.training:
mask_time_indices = _compute_mask_indices(
(batch_size, sequence_length),
Expand All @@ -1015,7 +1015,7 @@ def _mask_hidden_states(
min_masks=self.config.mask_time_min_masks,
)
mask_time_indices = torch.tensor(mask_time_indices, device=hidden_states.device, dtype=torch.bool)
hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype)
hidden_states[mask_time_indices] = self.masked_spec_embed

if self.config.mask_feature_prob > 0 and self.training:
# generate indices & apply SpecAugment along feature axis
Expand Down
6 changes: 3 additions & 3 deletions src/transformers/models/sew/modeling_sew.py
Original file line number Diff line number Diff line change
Expand Up @@ -834,7 +834,7 @@ def __init__(self, config: SEWConfig):
self.feature_dropout = nn.Dropout(config.feat_proj_dropout)

if config.mask_time_prob > 0.0 or config.mask_feature_prob > 0.0:
self.masked_spec_embed = nn.Parameter(torch.FloatTensor(config.hidden_size).uniform_())
self.masked_spec_embed = nn.Parameter(torch.Tensor(config.hidden_size).uniform_())

self.encoder = SEWEncoder(config)

Expand Down Expand Up @@ -862,7 +862,7 @@ def _mask_hidden_states(

if mask_time_indices is not None:
# apply SpecAugment along time axis with given mask_time_indices
hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype)
hidden_states[mask_time_indices] = self.masked_spec_embed
elif self.config.mask_time_prob > 0 and self.training:
mask_time_indices = _compute_mask_indices(
(batch_size, sequence_length),
Expand All @@ -872,7 +872,7 @@ def _mask_hidden_states(
min_masks=self.config.mask_time_min_masks,
)
mask_time_indices = torch.tensor(mask_time_indices, device=hidden_states.device, dtype=torch.bool)
hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype)
hidden_states[mask_time_indices] = self.masked_spec_embed

if self.config.mask_feature_prob > 0 and self.training:
# generate indices & apply SpecAugment along feature axis
Expand Down
6 changes: 3 additions & 3 deletions src/transformers/models/sew_d/modeling_sew_d.py
Original file line number Diff line number Diff line change
Expand Up @@ -1360,7 +1360,7 @@ def __init__(self, config: SEWDConfig):
self.feature_dropout = nn.Dropout(config.feat_proj_dropout)

if config.mask_time_prob > 0.0 or config.mask_feature_prob > 0.0:
self.masked_spec_embed = nn.Parameter(torch.FloatTensor(config.hidden_size).uniform_())
self.masked_spec_embed = nn.Parameter(torch.Tensor(config.hidden_size).uniform_())

self.encoder = SEWDEncoder(config)

Expand Down Expand Up @@ -1388,7 +1388,7 @@ def _mask_hidden_states(

if mask_time_indices is not None:
# apply SpecAugment along time axis with given mask_time_indices
hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype)
hidden_states[mask_time_indices] = self.masked_spec_embed
elif self.config.mask_time_prob > 0 and self.training:
mask_time_indices = _compute_mask_indices(
(batch_size, sequence_length),
Expand All @@ -1398,7 +1398,7 @@ def _mask_hidden_states(
min_masks=self.config.mask_time_min_masks,
)
mask_time_indices = torch.tensor(mask_time_indices, device=hidden_states.device, dtype=torch.bool)
hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype)
hidden_states[mask_time_indices] = self.masked_spec_embed

if self.config.mask_feature_prob > 0 and self.training:
# generate indices & apply SpecAugment along feature axis
Expand Down
6 changes: 3 additions & 3 deletions src/transformers/models/speecht5/modeling_speecht5.py
Original file line number Diff line number Diff line change
Expand Up @@ -517,7 +517,7 @@ def __init__(self, config):

# model only needs masking vector if mask prob is > 0.0
if config.mask_time_prob > 0.0 or config.mask_feature_prob > 0.0:
self.masked_spec_embed = nn.Parameter(torch.FloatTensor(config.hidden_size).uniform_())
self.masked_spec_embed = nn.Parameter(torch.Tensor(config.hidden_size).uniform_())

self.pos_conv_embed = SpeechT5PositionalConvEmbedding(config)
self.pos_sinusoidal_embed = SpeechT5SinusoidalPositionalEmbedding(
Expand Down Expand Up @@ -616,7 +616,7 @@ def _mask_hidden_states(

if mask_time_indices is not None:
# apply SpecAugment along time axis with given mask_time_indices
hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype)
hidden_states[mask_time_indices] = self.masked_spec_embed
elif self.config.mask_time_prob > 0 and self.training:
mask_time_indices = _compute_mask_indices(
(batch_size, sequence_length),
Expand All @@ -626,7 +626,7 @@ def _mask_hidden_states(
min_masks=self.config.mask_time_min_masks,
)
mask_time_indices = torch.tensor(mask_time_indices, device=hidden_states.device, dtype=torch.bool)
hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype)
hidden_states[mask_time_indices] = self.masked_spec_embed

if self.config.mask_feature_prob > 0 and self.training:
# generate indices & apply SpecAugment along feature axis
Expand Down
6 changes: 3 additions & 3 deletions src/transformers/models/unispeech/modeling_unispeech.py
Original file line number Diff line number Diff line change
Expand Up @@ -1090,7 +1090,7 @@ def __init__(self, config: UniSpeechConfig):
self.feature_projection = UniSpeechFeatureProjection(config)

if config.mask_time_prob > 0.0 or config.mask_feature_prob > 0.0:
self.masked_spec_embed = nn.Parameter(torch.FloatTensor(config.hidden_size).uniform_())
self.masked_spec_embed = nn.Parameter(torch.Tensor(config.hidden_size).uniform_())

if config.do_stable_layer_norm:
self.encoder = UniSpeechEncoderStableLayerNorm(config)
Expand Down Expand Up @@ -1121,7 +1121,7 @@ def _mask_hidden_states(

if mask_time_indices is not None:
# apply SpecAugment along time axis with given mask_time_indices
hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype)
hidden_states[mask_time_indices] = self.masked_spec_embed
elif self.config.mask_time_prob > 0 and self.training:
mask_time_indices = _compute_mask_indices(
(batch_size, sequence_length),
Expand All @@ -1131,7 +1131,7 @@ def _mask_hidden_states(
min_masks=self.config.mask_time_min_masks,
)
mask_time_indices = torch.tensor(mask_time_indices, device=hidden_states.device, dtype=torch.bool)
hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype)
hidden_states[mask_time_indices] = self.masked_spec_embed

if self.config.mask_feature_prob > 0 and self.training:
# generate indices & apply SpecAugment along feature axis
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1108,7 +1108,7 @@ def __init__(self, config: UniSpeechSatConfig):
self.feature_extractor = UniSpeechSatFeatureEncoder(config)
self.feature_projection = UniSpeechSatFeatureProjection(config)

self.masked_spec_embed = nn.Parameter(torch.FloatTensor(config.hidden_size).uniform_())
self.masked_spec_embed = nn.Parameter(torch.Tensor(config.hidden_size).uniform_())

if config.do_stable_layer_norm:
self.encoder = UniSpeechSatEncoderStableLayerNorm(config)
Expand Down Expand Up @@ -1139,7 +1139,7 @@ def _mask_hidden_states(

if mask_time_indices is not None:
# apply SpecAugment along time axis with given mask_time_indices
hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype)
hidden_states[mask_time_indices] = self.masked_spec_embed
elif self.config.mask_time_prob > 0 and self.training:
mask_time_indices = _compute_mask_indices(
(batch_size, sequence_length),
Expand All @@ -1149,7 +1149,7 @@ def _mask_hidden_states(
min_masks=self.config.mask_time_min_masks,
)
mask_time_indices = torch.tensor(mask_time_indices, device=hidden_states.device, dtype=torch.bool)
hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype)
hidden_states[mask_time_indices] = self.masked_spec_embed

if self.config.mask_feature_prob > 0 and self.training:
# generate indices & apply SpecAugment along feature axis
Expand Down
6 changes: 3 additions & 3 deletions src/transformers/models/wav2vec2/modeling_wav2vec2.py
Original file line number Diff line number Diff line change
Expand Up @@ -1445,7 +1445,7 @@ def __init__(self, config: Wav2Vec2Config):

# model only needs masking vector if mask prob is > 0.0
if config.mask_time_prob > 0.0 or config.mask_feature_prob > 0.0:
self.masked_spec_embed = nn.Parameter(torch.FloatTensor(config.hidden_size).uniform_())
self.masked_spec_embed = nn.Parameter(torch.Tensor(config.hidden_size).uniform_())

if config.do_stable_layer_norm:
self.encoder = Wav2Vec2EncoderStableLayerNorm(config)
Expand Down Expand Up @@ -1496,7 +1496,7 @@ def _mask_hidden_states(

if mask_time_indices is not None:
# apply SpecAugment along time axis with given mask_time_indices
hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype)
hidden_states[mask_time_indices] = self.masked_spec_embed
elif self.config.mask_time_prob > 0 and self.training:
mask_time_indices = _compute_mask_indices(
(batch_size, sequence_length),
Expand All @@ -1506,7 +1506,7 @@ def _mask_hidden_states(
min_masks=self.config.mask_time_min_masks,
)
mask_time_indices = torch.tensor(mask_time_indices, device=hidden_states.device, dtype=torch.bool)
hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype)
hidden_states[mask_time_indices] = self.masked_spec_embed

if self.config.mask_feature_prob > 0 and self.training:
# generate indices & apply SpecAugment along feature axis
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1053,7 +1053,7 @@ def __init__(self, config: Wav2Vec2BertConfig):

# model only needs masking vector if mask prob is > 0.0
if config.mask_time_prob > 0.0 or config.mask_feature_prob > 0.0:
self.masked_spec_embed = nn.Parameter(torch.FloatTensor(config.hidden_size).uniform_())
self.masked_spec_embed = nn.Parameter(torch.Tensor(config.hidden_size).uniform_())

self.encoder = Wav2Vec2BertEncoder(config)

Expand Down Expand Up @@ -1087,7 +1087,7 @@ def _mask_hidden_states(

if mask_time_indices is not None:
# apply SpecAugment along time axis with given mask_time_indices
hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype)
hidden_states[mask_time_indices] = self.masked_spec_embed
elif self.config.mask_time_prob > 0 and self.training:
mask_time_indices = _compute_mask_indices(
(batch_size, sequence_length),
Expand All @@ -1097,7 +1097,7 @@ def _mask_hidden_states(
min_masks=self.config.mask_time_min_masks,
)
mask_time_indices = torch.tensor(mask_time_indices, device=hidden_states.device, dtype=torch.bool)
hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype)
hidden_states[mask_time_indices] = self.masked_spec_embed

if self.config.mask_feature_prob > 0 and self.training:
# generate indices & apply SpecAugment along feature axis
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1235,7 +1235,7 @@ def __init__(self, config: Wav2Vec2ConformerConfig):

# model only needs masking vector if mask prob is > 0.0
if config.mask_time_prob > 0.0 or config.mask_feature_prob > 0.0:
self.masked_spec_embed = nn.Parameter(torch.FloatTensor(config.hidden_size).uniform_())
self.masked_spec_embed = nn.Parameter(torch.Tensor(config.hidden_size).uniform_())

self.encoder = Wav2Vec2ConformerEncoder(config)

Expand Down Expand Up @@ -1273,7 +1273,7 @@ def _mask_hidden_states(

if mask_time_indices is not None:
# apply SpecAugment along time axis with given mask_time_indices
hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype)
hidden_states[mask_time_indices] = self.masked_spec_embed
elif self.config.mask_time_prob > 0 and self.training:
mask_time_indices = _compute_mask_indices(
(batch_size, sequence_length),
Expand All @@ -1283,7 +1283,7 @@ def _mask_hidden_states(
min_masks=self.config.mask_time_min_masks,
)
mask_time_indices = torch.tensor(mask_time_indices, device=hidden_states.device, dtype=torch.bool)
hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype)
hidden_states[mask_time_indices] = self.masked_spec_embed

if self.config.mask_feature_prob > 0 and self.training:
# generate indices & apply SpecAugment along feature axis
Expand Down
6 changes: 3 additions & 3 deletions src/transformers/models/wavlm/modeling_wavlm.py
Original file line number Diff line number Diff line change
Expand Up @@ -1107,7 +1107,7 @@ def __init__(self, config: WavLMConfig):

# model only needs masking vector if mask prob is > 0.0
if config.mask_time_prob > 0.0 or config.mask_feature_prob > 0.0:
self.masked_spec_embed = nn.Parameter(torch.FloatTensor(config.hidden_size).uniform_())
self.masked_spec_embed = nn.Parameter(torch.Tensor(config.hidden_size).uniform_())

if config.do_stable_layer_norm:
self.encoder = WavLMEncoderStableLayerNorm(config)
Expand Down Expand Up @@ -1158,7 +1158,7 @@ def _mask_hidden_states(

if mask_time_indices is not None:
# apply SpecAugment along time axis with given mask_time_indices
hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype)
hidden_states[mask_time_indices] = self.masked_spec_embed
elif self.config.mask_time_prob > 0 and self.training:
mask_time_indices = _compute_mask_indices(
(batch_size, sequence_length),
Expand All @@ -1168,7 +1168,7 @@ def _mask_hidden_states(
min_masks=self.config.mask_time_min_masks,
)
mask_time_indices = torch.tensor(mask_time_indices, device=hidden_states.device, dtype=torch.bool)
hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype)
hidden_states[mask_time_indices] = self.masked_spec_embed

if self.config.mask_feature_prob > 0 and self.training:
# generate indices & apply SpecAugment along feature axis
Expand Down

0 comments on commit 68be1d3

Please sign in to comment.