diff --git a/src/transformers/models/deta/convert_deta_resnet_to_pytorch.py b/src/transformers/models/deta/convert_deta_resnet_to_pytorch.py index 02655f06e140b4..84672301034982 100644 --- a/src/transformers/models/deta/convert_deta_resnet_to_pytorch.py +++ b/src/transformers/models/deta/convert_deta_resnet_to_pytorch.py @@ -268,11 +268,17 @@ def convert_deta_checkpoint(model_name, pytorch_dump_folder_path, push_to_hub): outputs = model(pixel_values.to(device)) # verify logits - print("Logits:", outputs.logits[0, :3, :3]) - expected_logits = torch.tensor( - [[-7.3978, -2.5406, -4.1668], [-8.2684, -3.9933, -3.8096], [-7.0515, -3.7973, -5.8516]] - ) - expected_boxes = torch.tensor([[0.5043, 0.4973, 0.9998], [0.2542, 0.5489, 0.4748], [0.5490, 0.2765, 0.0570]]) + if model_name == "deta-resnet-50": + expected_logits = torch.tensor( + [[-7.3978, -2.5406, -4.1668], [-8.2684, -3.9933, -3.8096], [-7.0515, -3.7973, -5.8516]] + ) + expected_boxes = torch.tensor([[0.5043, 0.4973, 0.9998], [0.2542, 0.5489, 0.4748], [0.5490, 0.2765, 0.0570]]) + elif model_name == "deta-resnet-50-24-epochs": + expected_logits = torch.tensor( + [[-7.1688, -2.4857, -4.8669], [-7.8630, -3.8154, -4.2674], [-7.2730, -4.1865, -5.5323]] + ) + expected_boxes = torch.tensor([[0.5021, 0.4971, 0.9994], [0.2546, 0.5486, 0.4731], [0.1686, 0.1986, 0.2142]]) + assert torch.allclose(outputs.logits[0, :3, :3], expected_logits.to(device), atol=1e-4) assert torch.allclose(outputs.pred_boxes[0, :3, :3], expected_boxes.to(device), atol=1e-4) print("Everything ok!") diff --git a/tests/models/deta/test_modeling_deta.py b/tests/models/deta/test_modeling_deta.py index f011cc1f8670ac..7f1b43b2af97a3 100644 --- a/tests/models/deta/test_modeling_deta.py +++ b/tests/models/deta/test_modeling_deta.py @@ -63,6 +63,7 @@ def __init__( num_feature_levels=4, encoder_n_points=2, decoder_n_points=6, + two_stage=False, ): self.parent = parent self.batch_size = batch_size @@ -83,6 +84,7 @@ def __init__( self.num_feature_levels = num_feature_levels self.encoder_n_points = encoder_n_points self.decoder_n_points = decoder_n_points + self.two_stage = two_stage # we also set the expected seq length for both encoder and decoder self.encoder_seq_length = ( @@ -130,6 +132,7 @@ def get_config(self): num_feature_levels=self.num_feature_levels, encoder_n_points=self.encoder_n_points, decoder_n_points=self.decoder_n_points, + two_stage=self.two_stage, ) def prepare_config_and_inputs_for_common(self): @@ -397,12 +400,15 @@ def test_forward_signature(self): expected_arg_names = ["pixel_values", "pixel_mask"] self.assertListEqual(arg_names[:1], expected_arg_names) + @unittest.skip(reason="Model doesn't use tied weights") + def test_tied_model_weights_key_ignore(self): + pass + def test_initialization(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() configs_no_init = _config_zero_init(config) for model_class in self.all_model_classes: - print("Model class:", model_class) model = model_class(config=configs_no_init) for name, param in model.named_parameters(): if param.requires_grad: