Skip to content

Commit

Permalink
Merge branch 'huggingface:main' into fix_num_assistant_tokens
Browse files Browse the repository at this point in the history
  • Loading branch information
jmamou authored Feb 15, 2024
2 parents 69ed88e + f3aa7db commit 3993ed3
Show file tree
Hide file tree
Showing 10 changed files with 44 additions and 4 deletions.
2 changes: 1 addition & 1 deletion src/transformers/generation/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ class GenerateDecoderOnlyOutput(ModelOutput):
@dataclass
class GenerateEncoderDecoderOutput(ModelOutput):
"""
Outputs of encoder-decider generation models, when using non-beam methods.
Outputs of encoder-decoder generation models, when using non-beam methods.
Args:
sequences (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/modeling_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4192,7 +4192,7 @@ def warn_if_padding_and_no_attention_mask(self, input_ids, attention_mask):

@property
def _is_quantized_training_enabled(self):
logger.warning(
warnings.warn(
"`_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead",
FutureWarning,
)
Expand Down
3 changes: 2 additions & 1 deletion src/transformers/models/deta/modeling_deta.py
Original file line number Diff line number Diff line change
Expand Up @@ -627,7 +627,8 @@ def __init__(self, config: DetaConfig, num_heads: int, n_points: int):

def _reset_parameters(self):
nn.init.constant_(self.sampling_offsets.weight.data, 0.0)
thetas = torch.arange(self.n_heads, dtype=torch.int64).float() * (2.0 * math.pi / self.n_heads)
default_dtype = torch.get_default_dtype()
thetas = torch.arange(self.n_heads, dtype=torch.int64).to(default_dtype) * (2.0 * math.pi / self.n_heads)
grid_init = torch.stack([thetas.cos(), thetas.sin()], -1)
grid_init = (
(grid_init / grid_init.abs().max(-1, keepdim=True)[0])
Expand Down
1 change: 0 additions & 1 deletion src/transformers/quantizers/quantizer_aqlm.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,6 @@ def _process_model_before_weight_loading(
model.config.quantization_config = self.quantization_config

def _process_model_after_weight_loading(self, model: "PreTrainedModel", **kwargs):
model._is_quantized_training_enabled = False
return model

@property
Expand Down
6 changes: 6 additions & 0 deletions tests/models/bert_generation/test_modeling_bert_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,12 @@ def test_model_from_pretrained(self):
model = BertGenerationEncoder.from_pretrained("google/bert_for_seq_generation_L-24_bbc_encoder")
self.assertIsNotNone(model)

@unittest.skip(
"Not currently compatible. Fails with - NotImplementedError: Cannot copy out of meta tensor; no data!"
)
def test_save_load_low_cpu_mem_usage(self):
pass


@require_torch
class BertGenerationEncoderIntegrationTest(unittest.TestCase):
Expand Down
6 changes: 6 additions & 0 deletions tests/models/fsmt/test_modeling_fsmt.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,12 @@ def test_tie_model_weights(self):
def test_resize_embeddings_untied(self):
pass

@unittest.skip(
"Not currently compatible. Fails with - NotImplementedError: Cannot copy out of meta tensor; no data!"
)
def test_save_load_low_cpu_mem_usage(self):
pass


@require_torch
class FSMTHeadTests(unittest.TestCase):
Expand Down
6 changes: 6 additions & 0 deletions tests/models/marian/test_modeling_marian.py
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,12 @@ def test_training_gradient_checkpointing_use_reentrant(self):
def test_training_gradient_checkpointing_use_reentrant_false(self):
pass

@unittest.skip(
"Not currently compatible. Fails with - NotImplementedError: Cannot copy out of meta tensor; no data!"
)
def test_save_load_low_cpu_mem_usage(self):
pass


def assert_tensors_close(a, b, atol=1e-12, prefix=""):
"""If tensors have different shapes, different values or a and b are not both tensors, raise a nice Assertion error."""
Expand Down
4 changes: 4 additions & 0 deletions tests/models/musicgen/test_modeling_musicgen.py
Original file line number Diff line number Diff line change
Expand Up @@ -1144,6 +1144,10 @@ def test_greedy_generate_stereo_outputs(self):

self.assertNotIn(config.pad_token_id, output_generate)

@unittest.skip("Fails with - TypeError: _weight_norm_interface() missing 1 required positional argument: 'dim'")
def test_save_load_low_cpu_mem_usage(self):
pass


def get_bip_bip(bip_duration=0.125, duration=0.5, sample_rate=32000):
"""Produces a series of 'bip bip' sounds at a given frequency."""
Expand Down
12 changes: 12 additions & 0 deletions tests/models/reformer/test_modeling_reformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -687,6 +687,12 @@ def _check_hidden_states_for_generate(
def test_left_padding_compatibility(self):
pass

@unittest.skip(
"Not currently compatible. Fails with - NotImplementedError: Cannot copy out of meta tensor; no data!"
)
def test_save_load_low_cpu_mem_usage(self):
pass


@require_torch
class ReformerLSHAttnModelTest(
Expand Down Expand Up @@ -848,6 +854,12 @@ def test_past_key_values_format(self):
def test_left_padding_compatibility(self):
pass

@unittest.skip(
"Not currently compatible. Fails with - NotImplementedError: Cannot copy out of meta tensor; no data!"
)
def test_save_load_low_cpu_mem_usage(self):
pass


@require_torch
@require_sentencepiece
Expand Down
6 changes: 6 additions & 0 deletions tests/models/xlm_roberta_xl/test_modeling_xlm_roberta_xl.py
Original file line number Diff line number Diff line change
Expand Up @@ -515,6 +515,12 @@ def test_create_position_ids_from_inputs_embeds(self):
self.assertEqual(position_ids.shape, expected_positions.shape)
self.assertTrue(torch.all(torch.eq(position_ids, expected_positions)))

@unittest.skip(
"Not currently compatible. Fails with - NotImplementedError: Cannot copy out of meta tensor; no data!"
)
def test_save_load_low_cpu_mem_usage(self):
pass


@require_torch
class XLMRobertaModelXLIntegrationTest(unittest.TestCase):
Expand Down

0 comments on commit 3993ed3

Please sign in to comment.