Skip to content

Commit

Permalink
Early labels validation (#31240)
Browse files Browse the repository at this point in the history
* Move label validation checks - fail early

* Remove some formatting changes - add back labels change wav2vec2
  • Loading branch information
amyeroberts authored Jun 5, 2024
1 parent 03ea160 commit 5465904
Show file tree
Hide file tree
Showing 34 changed files with 165 additions and 188 deletions.
20 changes: 10 additions & 10 deletions src/transformers/models/bark/modeling_bark.py
Original file line number Diff line number Diff line change
Expand Up @@ -763,6 +763,12 @@ def forward(
use_cache = use_cache if use_cache is not None else self.config.use_cache
return_dict = return_dict if return_dict is not None else self.config.use_return_dict

loss = None
if labels is not None:
raise NotImplementedError(
"Training is not implemented yet for Bark - ensure you do not pass `labels` to the model."
)

# Verify if input_embeds already exists
# then compute embeddings.
if input_ids is not None and input_embeds is not None:
Expand Down Expand Up @@ -870,12 +876,6 @@ def forward(

logits = self.lm_head(hidden_states)

loss = None
if labels is not None:
raise NotImplementedError(
"Training is not implemented yet for Bark - ensure you do not pass `labels` to the model."
)

if not return_dict:
return tuple(
v for v in [None, logits, present_key_values, all_hidden_states, all_self_attentions] if v is not None
Expand Down Expand Up @@ -1393,6 +1393,10 @@ def forward(
)
return_dict = return_dict if return_dict is not None else self.config.use_return_dict

loss = None
if labels is not None:
raise NotImplementedError("Training is not implemented yet")

if codebook_idx == 0:
raise ValueError("Cannot predict 0th codebook - 0th codebook should be predicted by the coarse model")

Expand Down Expand Up @@ -1470,10 +1474,6 @@ def forward(

logits = self.lm_heads[codebook_idx - self.config.n_codes_given](hidden_states)

loss = None
if labels is not None:
raise NotImplementedError("Training is not implemented yet")

if not return_dict:
return tuple(v for v in [None, logits, all_hidden_states, all_self_attentions] if v is not None)

Expand Down
8 changes: 4 additions & 4 deletions src/transformers/models/beit/modeling_beit.py
Original file line number Diff line number Diff line change
Expand Up @@ -1247,6 +1247,9 @@ def forward(
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
)

if labels is not None and self.config.num_labels == 1:
raise ValueError("The number of labels should be greater than one")

outputs = self.beit(
pixel_values,
head_mask=head_mask,
Expand Down Expand Up @@ -1279,10 +1282,7 @@ def forward(

loss = None
if labels is not None:
if self.config.num_labels == 1:
raise ValueError("The number of labels should be greater than one")
else:
loss = self.compute_loss(logits, auxiliary_logits, labels)
loss = self.compute_loss(logits, auxiliary_logits, labels)

if not return_dict:
if output_hidden_states:
Expand Down
7 changes: 3 additions & 4 deletions src/transformers/models/data2vec/modeling_data2vec_audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -1372,9 +1372,11 @@ def forward(
All labels set to `-100` are ignored (masked), the loss is only computed for labels in `[0, ...,
config.vocab_size - 1]`.
"""

return_dict = return_dict if return_dict is not None else self.config.use_return_dict

if labels is not None and labels.max() >= self.config.vocab_size:
raise ValueError(f"Label values must be <= vocab_size: {self.config.vocab_size}")

outputs = self.data2vec_audio(
input_values,
attention_mask=attention_mask,
Expand All @@ -1390,9 +1392,6 @@ def forward(

loss = None
if labels is not None:
if labels.max() >= self.config.vocab_size:
raise ValueError(f"Label values must be <= vocab_size: {self.config.vocab_size}")

# retrieve loss input_lengths from attention_mask
attention_mask = (
attention_mask if attention_mask is not None else torch.ones_like(input_values, dtype=torch.long)
Expand Down
8 changes: 4 additions & 4 deletions src/transformers/models/data2vec/modeling_data2vec_vision.py
Original file line number Diff line number Diff line change
Expand Up @@ -1173,6 +1173,9 @@ def forward(
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
)

if labels is not None and self.config.num_labels == 1:
raise ValueError("The number of labels should be greater than one")

outputs = self.data2vec_vision(
pixel_values,
head_mask=head_mask,
Expand Down Expand Up @@ -1205,10 +1208,7 @@ def forward(

loss = None
if labels is not None:
if self.config.num_labels == 1:
raise ValueError("The number of labels should be greater than one")
else:
loss = self.compute_loss(logits, auxiliary_logits, labels)
loss = self.compute_loss(logits, auxiliary_logits, labels)

if not return_dict:
if output_hidden_states:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1633,6 +1633,9 @@ def call(
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
)

if labels is not None and self.config.num_labels == 1:
raise ValueError("The number of labels should be greater than one")

outputs = self.data2vec_vision(
pixel_values,
head_mask=head_mask,
Expand Down Expand Up @@ -1672,10 +1675,7 @@ def reshape_features(x):

loss = None
if labels is not None:
if self.config.num_labels == 1:
raise ValueError("The number of labels should be greater than one")
else:
loss = self.compute_loss(logits, auxiliary_logits, labels)
loss = self.compute_loss(logits, auxiliary_logits, labels)

if not return_dict:
if output_hidden_states:
Expand Down
5 changes: 2 additions & 3 deletions src/transformers/models/deprecated/mctct/modeling_mctct.py
Original file line number Diff line number Diff line change
Expand Up @@ -732,6 +732,8 @@ def forward(
All labels set to `-100` are ignored (masked), the loss is only computed for labels in `[0, ...,
config.vocab_size - 1]`.
"""
if labels is not None and labels.max() >= self.config.vocab_size:
raise ValueError(f"Label values must be <= vocab_size: {self.config.vocab_size}")

return_dict = return_dict if return_dict is not None else self.config.use_return_dict
outputs = self.mctct(
Expand All @@ -749,9 +751,6 @@ def forward(

loss = None
if labels is not None:
if labels.max() >= self.config.vocab_size:
raise ValueError(f"Label values must be <= vocab_size: {self.config.vocab_size}")

# retrieve loss input_lengths from attention_mask
attention_mask = (
attention_mask
Expand Down
11 changes: 5 additions & 6 deletions src/transformers/models/deprecated/realm/modeling_realm.py
Original file line number Diff line number Diff line change
Expand Up @@ -1440,9 +1440,13 @@ def forward(
>>> outputs = model(**inputs)
>>> logits = outputs.logits
```"""

return_dict = return_dict if return_dict is not None else self.config.use_return_dict

if labels is not None and relevance_score is None:
raise ValueError(
"You have to specify `relevance_score` when `labels` is specified in order to compute loss."
)

(flattened_input_ids, flattened_attention_mask, flattened_token_type_ids) = self._flatten_inputs(
input_ids, attention_mask, token_type_ids
)
Expand All @@ -1468,11 +1472,6 @@ def forward(

masked_lm_loss = None
if labels is not None:
if candidate_score is None:
raise ValueError(
"You have to specify `relevance_score` when `labels` is specified in order to compute loss."
)

batch_size, seq_length = labels.size()

if mlm_mask is None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -424,6 +424,10 @@ def forward(
>>> formatted = (output * 255 / np.max(output)).astype("uint8")
>>> depth = Image.fromarray(formatted)
```"""
loss = None
if labels is not None:
raise NotImplementedError("Training is not implemented yet")

return_dict = return_dict if return_dict is not None else self.config.use_return_dict
output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
Expand All @@ -444,10 +448,6 @@ def forward(

predicted_depth = self.head(hidden_states, patch_height, patch_width)

loss = None
if labels is not None:
raise NotImplementedError("Training is not implemented yet")

if not return_dict:
if output_hidden_states:
output = (predicted_depth,) + outputs[1:]
Expand Down
38 changes: 19 additions & 19 deletions src/transformers/models/dpt/modeling_dpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -1136,6 +1136,10 @@ def forward(
>>> formatted = (output * 255 / np.max(output)).astype("uint8")
>>> depth = Image.fromarray(formatted)
```"""
loss = None
if labels is not None:
raise NotImplementedError("Training is not implemented yet")

return_dict = return_dict if return_dict is not None else self.config.use_return_dict
output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
Expand Down Expand Up @@ -1183,10 +1187,6 @@ def forward(

predicted_depth = self.head(hidden_states)

loss = None
if labels is not None:
raise NotImplementedError("Training is not implemented yet")

if not return_dict:
if output_hidden_states:
output = (predicted_depth,) + outputs[1:]
Expand Down Expand Up @@ -1308,6 +1308,9 @@ def forward(
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
)

if labels is not None and self.config.num_labels == 1:
raise ValueError("The number of labels should be greater than one")

outputs = self.dpt(
pixel_values,
head_mask=head_mask,
Expand Down Expand Up @@ -1342,22 +1345,19 @@ def forward(

loss = None
if labels is not None:
if self.config.num_labels == 1:
raise ValueError("The number of labels should be greater than one")
else:
# upsample logits to the images' original size
upsampled_logits = nn.functional.interpolate(
logits, size=labels.shape[-2:], mode="bilinear", align_corners=False
# upsample logits to the images' original size
upsampled_logits = nn.functional.interpolate(
logits, size=labels.shape[-2:], mode="bilinear", align_corners=False
)
if auxiliary_logits is not None:
upsampled_auxiliary_logits = nn.functional.interpolate(
auxiliary_logits, size=labels.shape[-2:], mode="bilinear", align_corners=False
)
if auxiliary_logits is not None:
upsampled_auxiliary_logits = nn.functional.interpolate(
auxiliary_logits, size=labels.shape[-2:], mode="bilinear", align_corners=False
)
# compute weighted loss
loss_fct = CrossEntropyLoss(ignore_index=self.config.semantic_loss_ignore_index)
main_loss = loss_fct(upsampled_logits, labels)
auxiliary_loss = loss_fct(upsampled_auxiliary_logits, labels)
loss = main_loss + self.config.auxiliary_loss_weight * auxiliary_loss
# compute weighted loss
loss_fct = CrossEntropyLoss(ignore_index=self.config.semantic_loss_ignore_index)
main_loss = loss_fct(upsampled_logits, labels)
auxiliary_loss = loss_fct(upsampled_auxiliary_logits, labels)
loss = main_loss + self.config.auxiliary_loss_weight * auxiliary_loss

if not return_dict:
if output_hidden_states:
Expand Down
5 changes: 2 additions & 3 deletions src/transformers/models/gptj/modeling_tf_gptj.py
Original file line number Diff line number Diff line change
Expand Up @@ -921,6 +921,8 @@ def call(
config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
`config.num_labels > 1` a classification loss is computed (Cross-Entropy).
"""
if labels is not None and self.config.pad_token_id is None and input_ids.shape[0] != 1:
raise ValueError("Cannot handle batch sizes > 1 if no padding token is defined.")

transformer_outputs = self.transformer(
input_ids=input_ids,
Expand Down Expand Up @@ -963,9 +965,6 @@ def call(
loss = None

if labels is not None:
if self.config.pad_token_id is None and logits_shape[0] != 1:
raise ValueError("Cannot handle batch sizes > 1 if no padding token is defined.")

if not tf.is_tensor(sequence_lengths):
in_logits = logits[0 : logits_shape[0], sequence_lengths]

Expand Down
7 changes: 3 additions & 4 deletions src/transformers/models/hubert/modeling_hubert.py
Original file line number Diff line number Diff line change
Expand Up @@ -1574,9 +1574,11 @@ def forward(
All labels set to `-100` are ignored (masked), the loss is only computed for labels in `[0, ...,
config.vocab_size - 1]`.
"""

return_dict = return_dict if return_dict is not None else self.config.use_return_dict

if labels is not None and labels.max() >= self.config.vocab_size:
raise ValueError(f"Label values must be <= vocab_size: {self.config.vocab_size}")

outputs = self.hubert(
input_values,
attention_mask=attention_mask,
Expand All @@ -1592,9 +1594,6 @@ def forward(

loss = None
if labels is not None:
if labels.max() >= self.config.vocab_size:
raise ValueError(f"Label values must be <= vocab_size: {self.config.vocab_size}")

# retrieve loss input_lengths from attention_mask
attention_mask = (
attention_mask if attention_mask is not None else torch.ones_like(input_values, dtype=torch.long)
Expand Down
5 changes: 2 additions & 3 deletions src/transformers/models/hubert/modeling_tf_hubert.py
Original file line number Diff line number Diff line change
Expand Up @@ -1600,6 +1600,8 @@ def call(
>>> loss = model(input_values, labels=labels).loss
```"""
if labels is not None and tf.reduce_max(labels) >= self.config.vocab_size:
raise ValueError(f"Label values must be <= vocab_size: {self.config.vocab_size}")

outputs = self.hubert(
input_values=input_values,
Expand All @@ -1619,9 +1621,6 @@ def call(
logits = self.lm_head(hidden_states)

if labels is not None:
if tf.reduce_max(labels) >= self.config.vocab_size:
raise ValueError(f"Label values must be <= vocab_size: {self.config.vocab_size}")

attention_mask = (
attention_mask if attention_mask is not None else tf.ones_like(input_values, dtype=tf.float32)
)
Expand Down
18 changes: 9 additions & 9 deletions src/transformers/models/mobilenet_v2/modeling_mobilenet_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -822,6 +822,9 @@ def forward(
)
return_dict = return_dict if return_dict is not None else self.config.use_return_dict

if labels is not None and self.config.num_labels == 1:
raise ValueError("The number of labels should be greater than one")

outputs = self.mobilenet_v2(
pixel_values,
output_hidden_states=True, # we need the intermediate hidden states
Expand All @@ -834,15 +837,12 @@ def forward(

loss = None
if labels is not None:
if self.config.num_labels == 1:
raise ValueError("The number of labels should be greater than one")
else:
# upsample logits to the images' original size
upsampled_logits = nn.functional.interpolate(
logits, size=labels.shape[-2:], mode="bilinear", align_corners=False
)
loss_fct = CrossEntropyLoss(ignore_index=self.config.semantic_loss_ignore_index)
loss = loss_fct(upsampled_logits, labels)
# upsample logits to the images' original size
upsampled_logits = nn.functional.interpolate(
logits, size=labels.shape[-2:], mode="bilinear", align_corners=False
)
loss_fct = CrossEntropyLoss(ignore_index=self.config.semantic_loss_ignore_index)
loss = loss_fct(upsampled_logits, labels)

if not return_dict:
if output_hidden_states:
Expand Down
18 changes: 9 additions & 9 deletions src/transformers/models/mobilevit/modeling_mobilevit.py
Original file line number Diff line number Diff line change
Expand Up @@ -1026,6 +1026,9 @@ def forward(
)
return_dict = return_dict if return_dict is not None else self.config.use_return_dict

if labels is not None and self.config.num_labels == 1:
raise ValueError("The number of labels should be greater than one")

outputs = self.mobilevit(
pixel_values,
output_hidden_states=True, # we need the intermediate hidden states
Expand All @@ -1038,15 +1041,12 @@ def forward(

loss = None
if labels is not None:
if self.config.num_labels == 1:
raise ValueError("The number of labels should be greater than one")
else:
# upsample logits to the images' original size
upsampled_logits = nn.functional.interpolate(
logits, size=labels.shape[-2:], mode="bilinear", align_corners=False
)
loss_fct = CrossEntropyLoss(ignore_index=self.config.semantic_loss_ignore_index)
loss = loss_fct(upsampled_logits, labels)
# upsample logits to the images' original size
upsampled_logits = nn.functional.interpolate(
logits, size=labels.shape[-2:], mode="bilinear", align_corners=False
)
loss_fct = CrossEntropyLoss(ignore_index=self.config.semantic_loss_ignore_index)
loss = loss_fct(upsampled_logits, labels)

if not return_dict:
if output_hidden_states:
Expand Down
Loading

0 comments on commit 5465904

Please sign in to comment.