From 5e8d40c401df2b06b60e2f3dec13373c54d21c9d Mon Sep 17 00:00:00 2001 From: Amy Roberts <22614925+amyeroberts@users.noreply.github.com> Date: Fri, 22 Dec 2023 20:21:21 +0000 Subject: [PATCH 01/32] Enable instantiating model with pretrained backbone weights --- src/transformers/models/vitmatte/configuration_vitmatte.py | 3 --- tests/utils/test_backbone_utils.py | 4 ---- 2 files changed, 7 deletions(-) diff --git a/src/transformers/models/vitmatte/configuration_vitmatte.py b/src/transformers/models/vitmatte/configuration_vitmatte.py index 275640d1d079a1..9984d89e1d6e4b 100644 --- a/src/transformers/models/vitmatte/configuration_vitmatte.py +++ b/src/transformers/models/vitmatte/configuration_vitmatte.py @@ -99,10 +99,7 @@ def __init__( if use_pretrained_backbone: raise ValueError("Pretrained backbones are not supported yet.") - if backbone_config is not None and backbone is not None: - raise ValueError("You can't specify both `backbone` and `backbone_config`.") - if backbone_config is None and backbone is None: logger.info("`backbone_config` is `None`. Initializing the config with the default `VitDet` backbone.") backbone_config = CONFIG_MAPPING["vitdet"](out_features=["stage4"]) diff --git a/tests/utils/test_backbone_utils.py b/tests/utils/test_backbone_utils.py index cd9a5a29a8c071..1b9f49b5cfb6e1 100644 --- a/tests/utils/test_backbone_utils.py +++ b/tests/utils/test_backbone_utils.py @@ -31,10 +31,6 @@ import torch from transformers import BertPreTrainedModel - - -class BackboneUtilsTester(unittest.TestCase): - def test_get_aligned_output_features_output_indices(self): stage_names = ["a", "b", "c"] # Defaults to last layer if both are None From 11ede698991169b4f27a7dfd294b97900bf11896 Mon Sep 17 00:00:00 2001 From: Amy Roberts <22614925+amyeroberts@users.noreply.github.com> Date: Wed, 3 Jan 2024 20:40:24 +0000 Subject: [PATCH 02/32] Clarify pretrained import --- tests/utils/test_backbone_utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/utils/test_backbone_utils.py b/tests/utils/test_backbone_utils.py index 1b9f49b5cfb6e1..cd9a5a29a8c071 100644 --- a/tests/utils/test_backbone_utils.py +++ b/tests/utils/test_backbone_utils.py @@ -31,6 +31,10 @@ import torch from transformers import BertPreTrainedModel + + +class BackboneUtilsTester(unittest.TestCase): + def test_get_aligned_output_features_output_indices(self): stage_names = ["a", "b", "c"] # Defaults to last layer if both are None From e1067a4f2d413cce0b0c79fa6a24e2601072fcd7 Mon Sep 17 00:00:00 2001 From: Amy Roberts <22614925+amyeroberts@users.noreply.github.com> Date: Thu, 4 Jan 2024 15:17:26 +0000 Subject: [PATCH 03/32] Use load_backbone instead --- src/transformers/models/dpt/modeling_dpt.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/transformers/models/dpt/modeling_dpt.py b/src/transformers/models/dpt/modeling_dpt.py index aad3330279f051..da9654c9d46178 100755 --- a/src/transformers/models/dpt/modeling_dpt.py +++ b/src/transformers/models/dpt/modeling_dpt.py @@ -1079,6 +1079,8 @@ def __init__(self, config): self.backbone = load_backbone(config) else: self.dpt = DPTModel(config, add_pooling_layer=False) + else: + self.backbone = load_backbone(config) # Neck self.neck = DPTNeck(config) From d19fc39715483ef6768205fb50a93acd8d3f459e Mon Sep 17 00:00:00 2001 From: Amy Roberts <22614925+amyeroberts@users.noreply.github.com> Date: Thu, 4 Jan 2024 18:38:58 +0000 Subject: [PATCH 04/32] Add backbone_kwargs to config --- src/transformers/utils/backbone_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/utils/backbone_utils.py b/src/transformers/utils/backbone_utils.py index 14fcfe4a50a2d2..696edf6155c921 100644 --- a/src/transformers/utils/backbone_utils.py +++ b/src/transformers/utils/backbone_utils.py @@ -288,7 +288,7 @@ def to_dict(self): return output -def load_backbone(config): +def load_backbone(config, **kwargs): """ Loads the backbone model from a config object. From 59ba869751995e0653c2d887b398fecb5106dce3 Mon Sep 17 00:00:00 2001 From: Amy Roberts <22614925+amyeroberts@users.noreply.github.com> Date: Wed, 31 Jan 2024 19:47:10 +0000 Subject: [PATCH 05/32] Fix up --- src/transformers/models/dpt/modeling_dpt.py | 2 -- src/transformers/models/vitmatte/configuration_vitmatte.py | 1 - 2 files changed, 3 deletions(-) diff --git a/src/transformers/models/dpt/modeling_dpt.py b/src/transformers/models/dpt/modeling_dpt.py index da9654c9d46178..aad3330279f051 100755 --- a/src/transformers/models/dpt/modeling_dpt.py +++ b/src/transformers/models/dpt/modeling_dpt.py @@ -1079,8 +1079,6 @@ def __init__(self, config): self.backbone = load_backbone(config) else: self.dpt = DPTModel(config, add_pooling_layer=False) - else: - self.backbone = load_backbone(config) # Neck self.neck = DPTNeck(config) diff --git a/src/transformers/models/vitmatte/configuration_vitmatte.py b/src/transformers/models/vitmatte/configuration_vitmatte.py index 9984d89e1d6e4b..840813b9c66e4e 100644 --- a/src/transformers/models/vitmatte/configuration_vitmatte.py +++ b/src/transformers/models/vitmatte/configuration_vitmatte.py @@ -99,7 +99,6 @@ def __init__( if use_pretrained_backbone: raise ValueError("Pretrained backbones are not supported yet.") - if backbone_config is not None and backbone is not None: if backbone_config is None and backbone is None: logger.info("`backbone_config` is `None`. Initializing the config with the default `VitDet` backbone.") backbone_config = CONFIG_MAPPING["vitdet"](out_features=["stage4"]) From 899c9bd3f8d8b04cffc04083e3d776757250a488 Mon Sep 17 00:00:00 2001 From: Amy Roberts <22614925+amyeroberts@users.noreply.github.com> Date: Thu, 1 Feb 2024 11:19:14 +0000 Subject: [PATCH 06/32] Add tests --- tests/utils/test_backbone_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/utils/test_backbone_utils.py b/tests/utils/test_backbone_utils.py index cd9a5a29a8c071..244f62950f048f 100644 --- a/tests/utils/test_backbone_utils.py +++ b/tests/utils/test_backbone_utils.py @@ -167,7 +167,7 @@ def test_load_backbone_from_checkpoint(self): ) backbone = load_backbone(config) # We can't know ahead of time the exact output features and indices, or the layer names before - # creating the timm model, so it defaults to the last layer (-1,) and has a different layer name + # creating the timm model, so it defalts to the last layer (-1,) and has a different layer name self.assertEqual(backbone.out_indices, (-1,)) self.assertEqual(backbone.out_features, ["layer4"]) self.assertIsInstance(backbone, TimmBackbone) From 6fc904d5a03fe67a95ffa0f03c26f82f1396c6e2 Mon Sep 17 00:00:00 2001 From: Amy Roberts <22614925+amyeroberts@users.noreply.github.com> Date: Thu, 1 Feb 2024 15:23:09 +0000 Subject: [PATCH 07/32] Tidy up --- src/transformers/models/vitmatte/configuration_vitmatte.py | 4 ++++ src/transformers/utils/backbone_utils.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/transformers/models/vitmatte/configuration_vitmatte.py b/src/transformers/models/vitmatte/configuration_vitmatte.py index 840813b9c66e4e..275640d1d079a1 100644 --- a/src/transformers/models/vitmatte/configuration_vitmatte.py +++ b/src/transformers/models/vitmatte/configuration_vitmatte.py @@ -99,6 +99,10 @@ def __init__( if use_pretrained_backbone: raise ValueError("Pretrained backbones are not supported yet.") + + if backbone_config is not None and backbone is not None: + raise ValueError("You can't specify both `backbone` and `backbone_config`.") + if backbone_config is None and backbone is None: logger.info("`backbone_config` is `None`. Initializing the config with the default `VitDet` backbone.") backbone_config = CONFIG_MAPPING["vitdet"](out_features=["stage4"]) diff --git a/src/transformers/utils/backbone_utils.py b/src/transformers/utils/backbone_utils.py index 696edf6155c921..14fcfe4a50a2d2 100644 --- a/src/transformers/utils/backbone_utils.py +++ b/src/transformers/utils/backbone_utils.py @@ -288,7 +288,7 @@ def to_dict(self): return output -def load_backbone(config, **kwargs): +def load_backbone(config): """ Loads the backbone model from a config object. From ad201dd2edc960406b2acb9db0e8f2750400c7b5 Mon Sep 17 00:00:00 2001 From: Amy Roberts <22614925+amyeroberts@users.noreply.github.com> Date: Fri, 22 Dec 2023 20:21:21 +0000 Subject: [PATCH 08/32] Enable instantiating model with pretrained backbone weights --- .../configuration_conditional_detr.py | 11 +-- .../configuration_deformable_detr.py | 11 +-- .../models/deta/configuration_deta.py | 16 +--- .../models/detr/configuration_detr.py | 13 +-- .../models/dpt/configuration_dpt.py | 23 +---- .../mask2former/configuration_mask2former.py | 20 +---- .../maskformer/configuration_maskformer.py | 19 +--- .../oneformer/configuration_oneformer.py | 19 +--- .../configuration_table_transformer.py | 13 +-- .../models/tvp/configuration_tvp.py | 16 +--- .../models/upernet/configuration_upernet.py | 16 +--- .../vit_hybrid/configuration_vit_hybrid.py | 16 +--- .../models/vitmatte/configuration_vitmatte.py | 16 +--- src/transformers/utils/backbone_utils.py | 53 ++++------- tests/utils/test_backbone_utils.py | 90 +------------------ utils/check_config_attributes.py | 5 -- 16 files changed, 53 insertions(+), 304 deletions(-) diff --git a/src/transformers/models/conditional_detr/configuration_conditional_detr.py b/src/transformers/models/conditional_detr/configuration_conditional_detr.py index 945e5edb32ad30..163865f7332343 100644 --- a/src/transformers/models/conditional_detr/configuration_conditional_detr.py +++ b/src/transformers/models/conditional_detr/configuration_conditional_detr.py @@ -94,10 +94,8 @@ class ConditionalDetrConfig(PretrainedConfig): will load the corresponding pretrained weights from the timm or transformers library. If `use_pretrained_backbone` is `False`, this loads the backbone's config and uses that to initialize the backbone with random weights. use_pretrained_backbone (`bool`, *optional*, defaults to `True`): - Whether to use pretrained weights for the backbone. - backbone_kwargs (`dict`, *optional*): - Keyword arguments to be passed to AutoBackbone when loading from a checkpoint - e.g. `{'out_indices': (0, 1, 2, 3)}`. Cannot be specified if `backbone_config` is set. + Whether to use pretrained weights for the backbone. You cannot specify both `backbone` and `backbone_config` + when this is `False`. dilation (`bool`, *optional*, defaults to `False`): Whether to replace stride with dilation in the last convolutional block (DC5). Only supported when `use_timm_backbone` = `True`. @@ -181,11 +179,6 @@ def __init__( focal_alpha=0.25, **kwargs, ): - if not use_timm_backbone and use_pretrained_backbone: - raise ValueError( - "Loading pretrained backbone weights from the transformers library is not supported yet. `use_timm_backbone` must be set to `True` when `use_pretrained_backbone=True`" - ) - if backbone_config is not None and backbone is not None: raise ValueError("You can't specify both `backbone` and `backbone_config`.") diff --git a/src/transformers/models/deformable_detr/configuration_deformable_detr.py b/src/transformers/models/deformable_detr/configuration_deformable_detr.py index 6d32f6220df586..6a9cb70b4221e7 100644 --- a/src/transformers/models/deformable_detr/configuration_deformable_detr.py +++ b/src/transformers/models/deformable_detr/configuration_deformable_detr.py @@ -87,10 +87,8 @@ class DeformableDetrConfig(PretrainedConfig): will load the corresponding pretrained weights from the timm or transformers library. If `use_pretrained_backbone` is `False`, this loads the backbone's config and uses that to initialize the backbone with random weights. use_pretrained_backbone (`bool`, *optional*, defaults to `True`): - Whether to use pretrained weights for the backbone. - backbone_kwargs (`dict`, *optional*): - Keyword arguments to be passed to AutoBackbone when loading from a checkpoint - e.g. `{'out_indices': (0, 1, 2, 3)}`. Cannot be specified if `backbone_config` is set. + Whether to use pretrained weights for the backbone. You cannot specify both `backbone` and `backbone_config` + when this is `False`. dilation (`bool`, *optional*, defaults to `False`): Whether to replace stride with dilation in the last convolutional block (DC5). Only supported when `use_timm_backbone` = `True`. @@ -198,11 +196,6 @@ def __init__( disable_custom_kernels=False, **kwargs, ): - if not use_timm_backbone and use_pretrained_backbone: - raise ValueError( - "Loading pretrained backbone weights from the transformers library is not supported yet. `use_timm_backbone` must be set to `True` when `use_pretrained_backbone=True`" - ) - if backbone_config is not None and backbone is not None: raise ValueError("You can't specify both `backbone` and `backbone_config`.") diff --git a/src/transformers/models/deta/configuration_deta.py b/src/transformers/models/deta/configuration_deta.py index 1604bc56e6396d..124fe9d94d2b59 100644 --- a/src/transformers/models/deta/configuration_deta.py +++ b/src/transformers/models/deta/configuration_deta.py @@ -44,13 +44,8 @@ class DetaConfig(PretrainedConfig): will load the corresponding pretrained weights from the timm or transformers library. If `use_pretrained_backbone` is `False`, this loads the backbone's config and uses that to initialize the backbone with random weights. use_pretrained_backbone (`bool`, *optional*, `False`): - Whether to use pretrained weights for the backbone. - use_timm_backbone (`bool`, *optional*, `False`): - Whether to load `backbone` from the timm library. If `False`, the backbone is loaded from the transformers - library. - backbone_kwargs (`dict`, *optional*): - Keyword arguments to be passed to AutoBackbone when loading from a checkpoint - e.g. `{'out_indices': (0, 1, 2, 3)}`. Cannot be specified if `backbone_config` is set. + Whether to use pretrained weights for the backbone. You cannot specify both `backbone` and `backbone_config` + when this is `False`. num_queries (`int`, *optional*, defaults to 900): Number of object queries, i.e. detection slots. This is the maximal number of objects [`DetaModel`] can detect in a single image. In case `two_stage` is set to `True`, we use `two_stage_num_proposals` instead. @@ -154,8 +149,6 @@ def __init__( backbone_config=None, backbone=None, use_pretrained_backbone=False, - use_timm_backbone=False, - backbone_kwargs=None, num_queries=900, max_position_embeddings=2048, encoder_layers=6, @@ -196,9 +189,6 @@ def __init__( disable_custom_kernels=True, **kwargs, ): - if use_pretrained_backbone: - raise ValueError("Pretrained backbones are not supported yet.") - if backbone_config is not None and backbone is not None: raise ValueError("You can't specify both `backbone` and `backbone_config`.") @@ -217,8 +207,6 @@ def __init__( self.backbone_config = backbone_config self.backbone = backbone self.use_pretrained_backbone = use_pretrained_backbone - self.use_timm_backbone = use_timm_backbone - self.backbone_kwargs = backbone_kwargs self.num_queries = num_queries self.max_position_embeddings = max_position_embeddings self.d_model = d_model diff --git a/src/transformers/models/detr/configuration_detr.py b/src/transformers/models/detr/configuration_detr.py index 9b9b5afacd0b7f..56fb76a27728b7 100644 --- a/src/transformers/models/detr/configuration_detr.py +++ b/src/transformers/models/detr/configuration_detr.py @@ -94,11 +94,9 @@ class DetrConfig(PretrainedConfig): Name of backbone to use when `backbone_config` is `None`. If `use_pretrained_backbone` is `True`, this will load the corresponding pretrained weights from the timm or transformers library. If `use_pretrained_backbone` is `False`, this loads the backbone's config and uses that to initialize the backbone with random weights. - use_pretrained_backbone (`bool`, *optional*, `True`): - Whether to use pretrained weights for the backbone. - backbone_kwargs (`dict`, *optional*): - Keyword arguments to be passed to AutoBackbone when loading from a checkpoint - e.g. `{'out_indices': (0, 1, 2, 3)}`. Cannot be specified if `backbone_config` is set. + use_pretrained_backbone (`bool`, *optional*, `False`): + Whether to use pretrained weights for the backbone. You cannot specify both `backbone` and `backbone_config` + when this is `False`. dilation (`bool`, *optional*, defaults to `False`): Whether to replace stride with dilation in the last convolutional block (DC5). Only supported when `use_timm_backbone` = `True`. @@ -179,11 +177,6 @@ def __init__( eos_coefficient=0.1, **kwargs, ): - if not use_timm_backbone and use_pretrained_backbone: - raise ValueError( - "Loading pretrained backbone weights from the transformers library is not supported yet. `use_timm_backbone` must be set to `True` when `use_pretrained_backbone=True`" - ) - if backbone_config is not None and backbone is not None: raise ValueError("You can't specify both `backbone` and `backbone_config`.") diff --git a/src/transformers/models/dpt/configuration_dpt.py b/src/transformers/models/dpt/configuration_dpt.py index 9bdc8d1ef0affb..3b533b07b585e0 100644 --- a/src/transformers/models/dpt/configuration_dpt.py +++ b/src/transformers/models/dpt/configuration_dpt.py @@ -114,13 +114,8 @@ class DPTConfig(PretrainedConfig): will load the corresponding pretrained weights from the timm or transformers library. If `use_pretrained_backbone` is `False`, this loads the backbone's config and uses that to initialize the backbone with random weights. use_pretrained_backbone (`bool`, *optional*, defaults to `False`): - Whether to use pretrained weights for the backbone. - use_timm_backbone (`bool`, *optional*, defaults to `False`): - Whether to load `backbone` from the timm library. If `False`, the backbone is loaded from the transformers - library. - backbone_kwargs (`dict`, *optional*): - Keyword arguments to be passed to AutoBackbone when loading from a checkpoint - e.g. `{'out_indices': (0, 1, 2, 3)}`. Cannot be specified if `backbone_config` is set. + Whether to use pretrained weights for the backbone. You cannot specify both `backbone` and `backbone_config` + when this is `False`. Example: @@ -173,8 +168,6 @@ def __init__( backbone_config=None, backbone=None, use_pretrained_backbone=False, - use_timm_backbone=False, - backbone_kwargs=None, **kwargs, ): super().__init__(**kwargs) @@ -182,8 +175,8 @@ def __init__( self.hidden_size = hidden_size self.is_hybrid = is_hybrid - if use_pretrained_backbone: - raise ValueError("Pretrained backbones are not supported yet.") + if backbone_config is not None and backbone is not None: + raise ValueError("You can't specify both `backbone` and `backbone_config`.") use_autobackbone = False if self.is_hybrid: @@ -229,16 +222,8 @@ def __init__( self.backbone_featmap_shape = None self.neck_ignore_stages = [] - if use_autobackbone and backbone_config is not None and backbone is not None: - raise ValueError("You can't specify both `backbone` and `backbone_config`.") - - if backbone_kwargs is not None and backbone_kwargs and backbone_config is not None: - raise ValueError("You can't specify both `backbone_kwargs` and `backbone_config`.") - self.backbone = backbone self.use_pretrained_backbone = use_pretrained_backbone - self.use_timm_backbone = use_timm_backbone - self.backbone_kwargs = backbone_kwargs self.num_hidden_layers = None if use_autobackbone else num_hidden_layers self.num_attention_heads = None if use_autobackbone else num_attention_heads self.intermediate_size = None if use_autobackbone else intermediate_size diff --git a/src/transformers/models/mask2former/configuration_mask2former.py b/src/transformers/models/mask2former/configuration_mask2former.py index f0d13b8e030ed1..92d472591cf175 100644 --- a/src/transformers/models/mask2former/configuration_mask2former.py +++ b/src/transformers/models/mask2former/configuration_mask2former.py @@ -46,13 +46,8 @@ class Mask2FormerConfig(PretrainedConfig): will load the corresponding pretrained weights from the timm or transformers library. If `use_pretrained_backbone` is `False`, this loads the backbone's config and uses that to initialize the backbone with random weights. use_pretrained_backbone (`bool`, *optional*, `False`): - Whether to use pretrained weights for the backbone. - use_timm_backbone (`bool`, *optional*, `False`): - Whether to load `backbone` from the timm library. If `False`, the backbone is loaded from the transformers - library. - backbone_kwargs (`dict`, *optional*): - Keyword arguments to be passed to AutoBackbone when loading from a checkpoint - e.g. `{'out_indices': (0, 1, 2, 3)}`. Cannot be specified if `backbone_config` is set. + Whether to use pretrained weights for the backbone. You cannot specify both `backbone` and `backbone_config` + when this is `False`. feature_size (`int`, *optional*, defaults to 256): The features (channels) of the resulting feature maps. mask_feature_size (`int`, *optional*, defaults to 256): @@ -160,15 +155,10 @@ def __init__( use_auxiliary_loss: bool = True, feature_strides: List[int] = [4, 8, 16, 32], output_auxiliary_logits: bool = None, - backbone: Optional[str] = None, - use_pretrained_backbone: bool = False, - use_timm_backbone: bool = False, - backbone_kwargs: Optional[Dict] = None, + backbone=None, + use_pretrained_backbone=False, **kwargs, ): - if use_pretrained_backbone: - raise ValueError("Pretrained backbones are not supported yet.") - if backbone_config is not None and backbone is not None: raise ValueError("You can't specify both `backbone` and `backbone_config`.") @@ -233,8 +223,6 @@ def __init__( self.num_hidden_layers = decoder_layers self.backbone = backbone self.use_pretrained_backbone = use_pretrained_backbone - self.use_timm_backbone = use_timm_backbone - self.backbone_kwargs = backbone_kwargs super().__init__(**kwargs) diff --git a/src/transformers/models/maskformer/configuration_maskformer.py b/src/transformers/models/maskformer/configuration_maskformer.py index 653350ca056dda..e00a622ad2882b 100644 --- a/src/transformers/models/maskformer/configuration_maskformer.py +++ b/src/transformers/models/maskformer/configuration_maskformer.py @@ -56,13 +56,8 @@ class MaskFormerConfig(PretrainedConfig): will load the corresponding pretrained weights from the timm or transformers library. If `use_pretrained_backbone` is `False`, this loads the backbone's config and uses that to initialize the backbone with random weights. use_pretrained_backbone (`bool`, *optional*, `False`): - Whether to use pretrained weights for the backbone. - use_timm_backbone (`bool`, *optional*, `False`): - Whether to load `backbone` from the timm library. If `False`, the backbone is loaded from the transformers - library. - backbone_kwargs (`dict`, *optional*): - Keyword arguments to be passed to AutoBackbone when loading from a checkpoint - e.g. `{'out_indices': (0, 1, 2, 3)}`. Cannot be specified if `backbone_config` is set. + Whether to use pretrained weights for the backbone. You cannot specify both `backbone` and `backbone_config` + when this is `False`. decoder_config (`Dict`, *optional*): The configuration passed to the transformer decoder model, if unset the base config for `detr-resnet-50` will be used. @@ -122,19 +117,11 @@ def __init__( output_auxiliary_logits: Optional[bool] = None, backbone: Optional[str] = None, use_pretrained_backbone: bool = False, - use_timm_backbone: bool = False, - backbone_kwargs: Optional[Dict] = None, **kwargs, ): - if use_pretrained_backbone: - raise ValueError("Pretrained backbones are not supported yet.") - if backbone_config is not None and backbone is not None: raise ValueError("You can't specify both `backbone` and `backbone_config`.") - if backbone_kwargs is not None and backbone_kwargs and backbone_config is not None: - raise ValueError("You can't specify both `backbone_kwargs` and `backbone_config`.") - if backbone_config is None and backbone is None: # fall back to https://huggingface.co/microsoft/swin-base-patch4-window12-384-in22k backbone_config = SwinConfig( @@ -198,8 +185,6 @@ def __init__( self.num_hidden_layers = self.decoder_config.num_hidden_layers self.backbone = backbone self.use_pretrained_backbone = use_pretrained_backbone - self.use_timm_backbone = use_timm_backbone - self.backbone_kwargs = backbone_kwargs super().__init__(**kwargs) @classmethod diff --git a/src/transformers/models/oneformer/configuration_oneformer.py b/src/transformers/models/oneformer/configuration_oneformer.py index 1cbd2ab7dbc18f..b8f837462c1c31 100644 --- a/src/transformers/models/oneformer/configuration_oneformer.py +++ b/src/transformers/models/oneformer/configuration_oneformer.py @@ -45,13 +45,8 @@ class OneFormerConfig(PretrainedConfig): will load the corresponding pretrained weights from the timm or transformers library. If `use_pretrained_backbone` is `False`, this loads the backbone's config and uses that to initialize the backbone with random weights. use_pretrained_backbone (`bool`, *optional*, defaults to `False`): - Whether to use pretrained weights for the backbone. - use_timm_backbone (`bool`, *optional*, defaults to `False`): - Whether to load `backbone` from the timm library. If `False`, the backbone is loaded from the transformers - library. - backbone_kwargs (`dict`, *optional*): - Keyword arguments to be passed to AutoBackbone when loading from a checkpoint - e.g. `{'out_indices': (0, 1, 2, 3)}`. Cannot be specified if `backbone_config` is set. + Whether to use pretrained weights for the backbone. You cannot specify both `backbone` and `backbone_config` + when this is `False`. ignore_value (`int`, *optional*, defaults to 255): Values to be ignored in GT label while calculating loss. num_queries (`int`, *optional*, defaults to 150): @@ -154,8 +149,6 @@ def __init__( backbone_config: Optional[Dict] = None, backbone: Optional[str] = None, use_pretrained_backbone: bool = False, - use_timm_backbone: bool = False, - backbone_kwargs: Optional[Dict] = None, ignore_value: int = 255, num_queries: int = 150, no_object_weight: int = 0.1, @@ -198,9 +191,6 @@ def __init__( common_stride: int = 4, **kwargs, ): - if use_pretrained_backbone: - raise ValueError("Pretrained backbones are not supported yet.") - if backbone_config is not None and backbone is not None: raise ValueError("You can't specify both `backbone` and `backbone_config`.") @@ -223,14 +213,9 @@ def __init__( config_class = CONFIG_MAPPING[backbone_model_type] backbone_config = config_class.from_dict(backbone_config) - if backbone_kwargs is not None and backbone_kwargs and backbone_config is not None: - raise ValueError("You can't specify both `backbone_kwargs` and `backbone_config`.") - self.backbone_config = backbone_config self.backbone = backbone self.use_pretrained_backbone = use_pretrained_backbone - self.use_timm_backbone = use_timm_backbone - self.backbone_kwargs = backbone_kwargs self.ignore_value = ignore_value self.num_queries = num_queries self.no_object_weight = no_object_weight diff --git a/src/transformers/models/table_transformer/configuration_table_transformer.py b/src/transformers/models/table_transformer/configuration_table_transformer.py index 9a2ff6bbab3b24..42457cd63f8d6e 100644 --- a/src/transformers/models/table_transformer/configuration_table_transformer.py +++ b/src/transformers/models/table_transformer/configuration_table_transformer.py @@ -93,11 +93,9 @@ class TableTransformerConfig(PretrainedConfig): Name of backbone to use when `backbone_config` is `None`. If `use_pretrained_backbone` is `True`, this will load the corresponding pretrained weights from the timm or transformers library. If `use_pretrained_backbone` is `False`, this loads the backbone's config and uses that to initialize the backbone with random weights. - use_pretrained_backbone (`bool`, *optional*, `True`): - Whether to use pretrained weights for the backbone. - backbone_kwargs (`dict`, *optional*): - Keyword arguments to be passed to AutoBackbone when loading from a checkpoint - e.g. `{'out_indices': (0, 1, 2, 3)}`. Cannot be specified if `backbone_config` is set. + use_pretrained_backbone (`bool`, *optional*, `False`): + Whether to use pretrained weights for the backbone. You cannot specify both `backbone` and `backbone_config` + when this is `False`. dilation (`bool`, *optional*, defaults to `False`): Whether to replace stride with dilation in the last convolutional block (DC5). Only supported when `use_timm_backbone` = `True`. @@ -179,11 +177,6 @@ def __init__( eos_coefficient=0.1, **kwargs, ): - if not use_timm_backbone and use_pretrained_backbone: - raise ValueError( - "Loading pretrained backbone weights from the transformers library is not supported yet. `use_timm_backbone` must be set to `True` when `use_pretrained_backbone=True`" - ) - if backbone_config is not None and backbone is not None: raise ValueError("You can't specify both `backbone` and `backbone_config`.") diff --git a/src/transformers/models/tvp/configuration_tvp.py b/src/transformers/models/tvp/configuration_tvp.py index 85b7ac6a41cbcc..ccdc54ae07747b 100644 --- a/src/transformers/models/tvp/configuration_tvp.py +++ b/src/transformers/models/tvp/configuration_tvp.py @@ -46,13 +46,8 @@ class TvpConfig(PretrainedConfig): will load the corresponding pretrained weights from the timm or transformers library. If `use_pretrained_backbone` is `False`, this loads the backbone's config and uses that to initialize the backbone with random weights. use_pretrained_backbone (`bool`, *optional*, defaults to `False`): - Whether to use pretrained weights for the backbone. - use_timm_backbone (`bool`, *optional*, defaults to `False`): - Whether to load `backbone` from the timm library. If `False`, the backbone is loaded from the transformers - library. - backbone_kwargs (`dict`, *optional*): - Keyword arguments to be passed to AutoBackbone when loading from a checkpoint - e.g. `{'out_indices': (0, 1, 2, 3)}`. Cannot be specified if `backbone_config` is set. + Whether to use pretrained weights for the backbone. You cannot specify both `backbone` and `backbone_config` + when this is `False`. distance_loss_weight (`float`, *optional*, defaults to 1.0): The weight of distance loss. duration_loss_weight (`float`, *optional*, defaults to 0.1): @@ -107,8 +102,6 @@ def __init__( backbone_config=None, backbone=None, use_pretrained_backbone=False, - use_timm_backbone=False, - backbone_kwargs=None, distance_loss_weight=1.0, duration_loss_weight=0.1, visual_prompter_type="framepad", @@ -132,9 +125,6 @@ def __init__( **kwargs, ): super().__init__(**kwargs) - if use_pretrained_backbone: - raise ValueError("Pretrained backbones are not supported yet.") - if backbone_config is not None and backbone is not None: raise ValueError("You can't specify both `backbone` and `backbone_config`.") @@ -152,8 +142,6 @@ def __init__( self.backbone_config = backbone_config self.backbone = backbone self.use_pretrained_backbone = use_pretrained_backbone - self.use_timm_backbone = use_timm_backbone - self.backbone_kwargs = backbone_kwargs self.distance_loss_weight = distance_loss_weight self.duration_loss_weight = duration_loss_weight self.visual_prompter_type = visual_prompter_type diff --git a/src/transformers/models/upernet/configuration_upernet.py b/src/transformers/models/upernet/configuration_upernet.py index 609818c80d17b7..664ef47a5bccf9 100644 --- a/src/transformers/models/upernet/configuration_upernet.py +++ b/src/transformers/models/upernet/configuration_upernet.py @@ -41,13 +41,8 @@ class UperNetConfig(PretrainedConfig): will load the corresponding pretrained weights from the timm or transformers library. If `use_pretrained_backbone` is `False`, this loads the backbone's config and uses that to initialize the backbone with random weights. use_pretrained_backbone (`bool`, *optional*, `False`): - Whether to use pretrained weights for the backbone. - use_timm_backbone (`bool`, *optional*, `False`): - Whether to load `backbone` from the timm library. If `False`, the backbone is loaded from the transformers - library. - backbone_kwargs (`dict`, *optional*): - Keyword arguments to be passed to AutoBackbone when loading from a checkpoint - e.g. `{'out_indices': (0, 1, 2, 3)}`. Cannot be specified if `backbone_config` is set. + Whether to use pretrained weights for the backbone. You cannot specify both `backbone` and `backbone_config` + when this is `False`. hidden_size (`int`, *optional*, defaults to 512): The number of hidden units in the convolutional layers. initializer_range (`float`, *optional*, defaults to 0.02): @@ -89,8 +84,6 @@ def __init__( backbone_config=None, backbone=None, use_pretrained_backbone=False, - use_timm_backbone=False, - backbone_kwargs=None, hidden_size=512, initializer_range=0.02, pool_scales=[1, 2, 3, 6], @@ -104,9 +97,6 @@ def __init__( **kwargs, ): super().__init__(**kwargs) - if use_pretrained_backbone: - raise ValueError("Pretrained backbones are not supported yet.") - if backbone_config is not None and backbone is not None: raise ValueError("You can't specify both `backbone` and `backbone_config`.") @@ -124,8 +114,6 @@ def __init__( self.backbone_config = backbone_config self.backbone = backbone self.use_pretrained_backbone = use_pretrained_backbone - self.use_timm_backbone = use_timm_backbone - self.backbone_kwargs = backbone_kwargs self.hidden_size = hidden_size self.initializer_range = initializer_range self.pool_scales = pool_scales diff --git a/src/transformers/models/vit_hybrid/configuration_vit_hybrid.py b/src/transformers/models/vit_hybrid/configuration_vit_hybrid.py index 8a8a808ec60d05..2b9b0074ac190a 100644 --- a/src/transformers/models/vit_hybrid/configuration_vit_hybrid.py +++ b/src/transformers/models/vit_hybrid/configuration_vit_hybrid.py @@ -45,13 +45,8 @@ class ViTHybridConfig(PretrainedConfig): will load the corresponding pretrained weights from the timm or transformers library. If `use_pretrained_backbone` is `False`, this loads the backbone's config and uses that to initialize the backbone with random weights. use_pretrained_backbone (`bool`, *optional*, defaults to `False`): - Whether to use pretrained weights for the backbone. - use_timm_backbone (`bool`, *optional*, defaults to `False`): - Whether to load `backbone` from the timm library. If `False`, the backbone is loaded from the transformers - library. - backbone_kwargs (`dict`, *optional*): - Keyword arguments to be passed to AutoBackbone when loading from a checkpoint - e.g. `{'out_indices': (0, 1, 2, 3)}`. Cannot be specified if `backbone_config` is set. + Whether to use pretrained weights for the backbone. You cannot specify both `backbone` and `backbone_config` + when this is `False`. hidden_size (`int`, *optional*, defaults to 768): Dimensionality of the encoder layers and the pooler layer. num_hidden_layers (`int`, *optional*, defaults to 12): @@ -104,8 +99,6 @@ def __init__( backbone_config=None, backbone=None, use_pretrained_backbone=False, - use_timm_backbone=False, - backbone_kwargs=None, hidden_size=768, num_hidden_layers=12, num_attention_heads=12, @@ -123,9 +116,6 @@ def __init__( **kwargs, ): super().__init__(**kwargs) - if use_pretrained_backbone: - raise ValueError("Pretrained backbones are not supported yet.") - if backbone_config is not None and backbone is not None: raise ValueError("You can't specify both `backbone` and `backbone_config`.") @@ -156,8 +146,6 @@ def __init__( self.backbone_config = backbone_config self.backbone = backbone self.use_pretrained_backbone = use_pretrained_backbone - self.use_timm_backbone = use_timm_backbone - self.backbone_kwargs = backbone_kwargs self.hidden_size = hidden_size self.num_hidden_layers = num_hidden_layers self.num_attention_heads = num_attention_heads diff --git a/src/transformers/models/vitmatte/configuration_vitmatte.py b/src/transformers/models/vitmatte/configuration_vitmatte.py index 275640d1d079a1..9a42a336d14370 100644 --- a/src/transformers/models/vitmatte/configuration_vitmatte.py +++ b/src/transformers/models/vitmatte/configuration_vitmatte.py @@ -46,13 +46,8 @@ class VitMatteConfig(PretrainedConfig): will load the corresponding pretrained weights from the timm or transformers library. If `use_pretrained_backbone` is `False`, this loads the backbone's config and uses that to initialize the backbone with random weights. use_pretrained_backbone (`bool`, *optional*, defaults to `False`): - Whether to use pretrained weights for the backbone. - use_timm_backbone (`bool`, *optional*, defaults to `False`): - Whether to load `backbone` from the timm library. If `False`, the backbone is loaded from the transformers - library. - backbone_kwargs (`dict`, *optional*): - Keyword arguments to be passed to AutoBackbone when loading from a checkpoint - e.g. `{'out_indices': (0, 1, 2, 3)}`. Cannot be specified if `backbone_config` is set. + Whether to use pretrained weights for the backbone. You cannot specify both `backbone` and `backbone_config` + when this is `False`. hidden_size (`int`, *optional*, defaults to 384): The number of input channels of the decoder. batch_norm_eps (`float`, *optional*, defaults to 1e-05): @@ -86,8 +81,6 @@ def __init__( backbone_config: PretrainedConfig = None, backbone=None, use_pretrained_backbone=False, - use_timm_backbone=False, - backbone_kwargs=None, hidden_size: int = 384, batch_norm_eps: float = 1e-5, initializer_range: float = 0.02, @@ -97,9 +90,6 @@ def __init__( ): super().__init__(**kwargs) - if use_pretrained_backbone: - raise ValueError("Pretrained backbones are not supported yet.") - if backbone_config is not None and backbone is not None: raise ValueError("You can't specify both `backbone` and `backbone_config`.") @@ -117,8 +107,6 @@ def __init__( self.backbone_config = backbone_config self.backbone = backbone self.use_pretrained_backbone = use_pretrained_backbone - self.use_timm_backbone = use_timm_backbone - self.backbone_kwargs = backbone_kwargs self.batch_norm_eps = batch_norm_eps self.hidden_size = hidden_size self.initializer_range = initializer_range diff --git a/src/transformers/utils/backbone_utils.py b/src/transformers/utils/backbone_utils.py index 14fcfe4a50a2d2..e7ae99d40e2d0a 100644 --- a/src/transformers/utils/backbone_utils.py +++ b/src/transformers/utils/backbone_utils.py @@ -304,47 +304,28 @@ def load_backbone(config): use_timm_backbone = getattr(config, "use_timm_backbone", None) use_pretrained_backbone = getattr(config, "use_pretrained_backbone", None) backbone_checkpoint = getattr(config, "backbone", None) - backbone_kwargs = getattr(config, "backbone_kwargs", None) - - backbone_kwargs = {} if backbone_kwargs is None else backbone_kwargs - - if backbone_kwargs and backbone_config is not None: - raise ValueError("You can't specify both `backbone_kwargs` and `backbone_config`.") # If there is a backbone_config and a backbone checkpoint, and use_pretrained_backbone=False then the desired # behaviour is ill-defined: do you want to load from the checkpoint's config or the backbone_config? if backbone_config is not None and backbone_checkpoint is not None and use_pretrained_backbone is not None: raise ValueError("Cannot specify both config.backbone_config and config.backbone") - # If any of thhe following are set, then the config passed in is from a model which contains a backbone. - if ( - backbone_config is None - and use_timm_backbone is None - and backbone_checkpoint is None - and backbone_checkpoint is None - ): - return AutoBackbone.from_config(config=config, **backbone_kwargs) - - # config from the parent model that has a backbone - if use_timm_backbone: - if backbone_checkpoint is None: - raise ValueError("config.backbone must be set if use_timm_backbone is True") - # Because of how timm backbones were originally added to models, we need to pass in use_pretrained_backbone - # to determine whether to load the pretrained weights. - backbone = AutoBackbone.from_pretrained( - backbone_checkpoint, - use_timm_backbone=use_timm_backbone, - use_pretrained_backbone=use_pretrained_backbone, - **backbone_kwargs, - ) - elif use_pretrained_backbone: - if backbone_checkpoint is None: - raise ValueError("config.backbone must be set if use_pretrained_backbone is True") - backbone = AutoBackbone.from_pretrained(backbone_checkpoint, **backbone_kwargs) + if backbone_config is not None or use_timm_backbone or backbone_checkpoint is not None: + # This is a config from the parent model the has a backbone. This contains the pretrained backbone checkpoint + # if specified. + # By default, most models don't have use_pretrained_backbone set. + if use_pretrained_backbone: + if backbone_checkpoint is None: + raise ValueError("config.backbone must be set if use_pretrained_backbone is True") + backbone = AutoBackbone.from_pretrained( + backbone_checkpoint, + use_timm_backbone=getattr(config, "use_timm_backbone", False), + ) + else: + if backbone_config is None: + backbone_config = AutoConfig.from_pretrained(backbone_checkpoint) + backbone = AutoBackbone.from_config(config=backbone_config) else: - if backbone_config is None and backbone_checkpoint is None: - raise ValueError("Either config.backbone_config or config.backbone must be set") - if backbone_config is None: - backbone_config = AutoConfig.from_pretrained(backbone_checkpoint, **backbone_kwargs) - backbone = AutoBackbone.from_config(config=backbone_config) + # This is a backbone config, so we just initialize the backbone model with random weights directly. + backbone = AutoBackbone.from_config(config=config) return backbone diff --git a/tests/utils/test_backbone_utils.py b/tests/utils/test_backbone_utils.py index 244f62950f048f..b289f6ada6d992 100644 --- a/tests/utils/test_backbone_utils.py +++ b/tests/utils/test_backbone_utils.py @@ -16,7 +16,7 @@ import pytest -from transformers import DetrConfig, MaskFormerConfig, ResNetBackbone, ResNetConfig, TimmBackbone +from transformers import MaskFormerConfig from transformers.testing_utils import require_torch, slow from transformers.utils.backbone_utils import ( BackboneMixin, @@ -30,7 +30,8 @@ if is_torch_available(): import torch - from transformers import BertPreTrainedModel + # from transformers import PretrainedModel + from transformers import BertPreTrainedModel as PretrainedModel class BackboneUtilsTester(unittest.TestCase): @@ -137,65 +138,6 @@ def test_backbone_mixin(self): self.assertEqual(backbone.out_features, ["a", "c"]) self.assertEqual(backbone.out_indices, [-3, -1]) - @slow - @require_torch - def test_load_backbone_from_config(self): - """ - Test that load_backbone correctly loads a backbone from a backbone config. - """ - config = MaskFormerConfig(backbone_config=ResNetConfig(out_indices=(0, 2))) - backbone = load_backbone(config) - self.assertEqual(backbone.out_features, ["stem", "stage2"]) - self.assertEqual(backbone.out_indices, (0, 2)) - self.assertIsInstance(backbone, ResNetBackbone) - - @slow - @require_torch - def test_load_backbone_from_checkpoint(self): - """ - Test that load_backbone correctly loads a backbone from a checkpoint. - """ - config = MaskFormerConfig(backbone="microsoft/resnet-18", backbone_config=None) - backbone = load_backbone(config) - self.assertEqual(backbone.out_indices, [4]) - self.assertEqual(backbone.out_features, ["stage4"]) - self.assertIsInstance(backbone, ResNetBackbone) - - config = MaskFormerConfig( - backbone="resnet18", - use_timm_backbone=True, - ) - backbone = load_backbone(config) - # We can't know ahead of time the exact output features and indices, or the layer names before - # creating the timm model, so it defalts to the last layer (-1,) and has a different layer name - self.assertEqual(backbone.out_indices, (-1,)) - self.assertEqual(backbone.out_features, ["layer4"]) - self.assertIsInstance(backbone, TimmBackbone) - - @slow - @require_torch - def test_load_backbone_backbone_kwargs(self): - """ - Test that load_backbone correctly configures the loaded backbone with the provided kwargs. - """ - config = MaskFormerConfig(backbone="resnet18", use_timm_backbone=True, backbone_kwargs={"out_indices": (0, 1)}) - backbone = load_backbone(config) - self.assertEqual(backbone.out_indices, (0, 1)) - self.assertIsInstance(backbone, TimmBackbone) - - config = MaskFormerConfig(backbone="microsoft/resnet-18", backbone_kwargs={"out_indices": (0, 2)}) - backbone = load_backbone(config) - self.assertEqual(backbone.out_indices, (0, 2)) - self.assertIsInstance(backbone, ResNetBackbone) - - # Check can't be passed with a backone config - with pytest.raises(ValueError): - config = MaskFormerConfig( - backbone="microsoft/resnet-18", - backbone_config=ResNetConfig(out_indices=(0, 2)), - backbone_kwargs={"out_indices": (0, 1)}, - ) - @slow @require_torch def test_load_backbone_in_new_model(self): @@ -203,8 +145,7 @@ def test_load_backbone_in_new_model(self): Tests that new model can be created, with its weights instantiated and pretrained backbone weights loaded. """ - # Inherit from PreTrainedModel to ensure that the weights are initialized - class NewModel(BertPreTrainedModel): + class NewModel(PretrainedModel): def __init__(self, config): super().__init__(config) self.backbone = load_backbone(config) @@ -244,26 +185,3 @@ def get_equal_not_equal_weights(model_0, model_1): self.assertEqual(len(equal_weights), 20) # Linear layers are still initialized randomly self.assertEqual(len(not_equal_weights), 4) - - # Check loading in timm backbone - config = DetrConfig(use_pretrained_backbone=False, backbone="resnet18", use_timm_backbone=True) - model_0 = NewModel(config) - model_1 = NewModel(config) - equal_weights, not_equal_weights = get_equal_not_equal_weights(model_0, model_1) - - # Norm layers are always initialized with the same weights - equal_weights = [w for w in equal_weights if "bn" not in w and "downsample.1" not in w] - self.assertEqual(len(equal_weights), 0) - self.assertEqual(len(not_equal_weights), 24) - - # Now we create a new model with backbone weights that are pretrained - config.use_pretrained_backbone = True - model_0 = NewModel(config) - model_1 = NewModel(config) - equal_weights, not_equal_weights = get_equal_not_equal_weights(model_0, model_1) - - # Norm layers are always initialized with the same weights - equal_weights = [w for w in equal_weights if "bn" not in w and "downsample.1" not in w] - self.assertEqual(len(equal_weights), 20) - # Linear layers are still initialized randomly - self.assertEqual(len(not_equal_weights), 4) diff --git a/utils/check_config_attributes.py b/utils/check_config_attributes.py index f631c59b75d40e..cf4348fb530388 100644 --- a/utils/check_config_attributes.py +++ b/utils/check_config_attributes.py @@ -234,12 +234,7 @@ def check_attribute_being_used(config_class, attributes, default_value, source_s "out_features", "out_indices", "sampling_rate", - # backbone related arguments passed to load_backbone "use_pretrained_backbone", - "backbone", - "backbone_config", - "use_timm_backbone", - "backbone_kwargs", ] attributes_used_in_generation = ["encoder_no_repeat_ngram_size"] From 589007d53f77f45778159b2b128d172cc1c7ba2d Mon Sep 17 00:00:00 2001 From: Amy Roberts <22614925+amyeroberts@users.noreply.github.com> Date: Tue, 2 Jan 2024 17:33:32 +0000 Subject: [PATCH 09/32] Update tests so backbone checkpoint isn't passed in --- tests/models/conditional_detr/test_modeling_conditional_detr.py | 1 - tests/models/deformable_detr/test_modeling_deformable_detr.py | 2 +- tests/models/detr/test_modeling_detr.py | 1 - .../models/table_transformer/test_modeling_table_transformer.py | 1 - 4 files changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/models/conditional_detr/test_modeling_conditional_detr.py b/tests/models/conditional_detr/test_modeling_conditional_detr.py index d1152ed8622b9c..23424ba5e9f4ab 100644 --- a/tests/models/conditional_detr/test_modeling_conditional_detr.py +++ b/tests/models/conditional_detr/test_modeling_conditional_detr.py @@ -135,7 +135,6 @@ def get_config(self): use_timm_backbone=False, backbone_config=resnet_config, backbone=None, - use_pretrained_backbone=False, ) def prepare_config_and_inputs_for_common(self): diff --git a/tests/models/deformable_detr/test_modeling_deformable_detr.py b/tests/models/deformable_detr/test_modeling_deformable_detr.py index 7a83c4f1ed80a8..9ff1f5c24b1173 100644 --- a/tests/models/deformable_detr/test_modeling_deformable_detr.py +++ b/tests/models/deformable_detr/test_modeling_deformable_detr.py @@ -152,7 +152,7 @@ def get_config(self): use_timm_backbone=False, backbone=None, backbone_config=resnet_config, - use_pretrained_backbone=False, + backbone=None, ) def prepare_config_and_inputs_for_common(self): diff --git a/tests/models/detr/test_modeling_detr.py b/tests/models/detr/test_modeling_detr.py index 59b071e031aa8a..bc8b96b2cf2fb0 100644 --- a/tests/models/detr/test_modeling_detr.py +++ b/tests/models/detr/test_modeling_detr.py @@ -131,7 +131,6 @@ def get_config(self): use_timm_backbone=False, backbone_config=resnet_config, backbone=None, - use_pretrained_backbone=False, ) def prepare_config_and_inputs_for_common(self): diff --git a/tests/models/table_transformer/test_modeling_table_transformer.py b/tests/models/table_transformer/test_modeling_table_transformer.py index 79da1d191063ab..361a916efdd8fb 100644 --- a/tests/models/table_transformer/test_modeling_table_transformer.py +++ b/tests/models/table_transformer/test_modeling_table_transformer.py @@ -132,7 +132,6 @@ def get_config(self): use_timm_backbone=False, backbone_config=resnet_config, backbone=None, - use_pretrained_backbone=False, ) def prepare_config_and_inputs_for_common(self): From e5930076544d7448e5896fed0155b8fc9f6621e7 Mon Sep 17 00:00:00 2001 From: Amy Roberts <22614925+amyeroberts@users.noreply.github.com> Date: Wed, 3 Jan 2024 20:40:24 +0000 Subject: [PATCH 10/32] Clarify pretrained import --- tests/utils/test_backbone_utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/utils/test_backbone_utils.py b/tests/utils/test_backbone_utils.py index b289f6ada6d992..3326d12651d8d3 100644 --- a/tests/utils/test_backbone_utils.py +++ b/tests/utils/test_backbone_utils.py @@ -30,8 +30,7 @@ if is_torch_available(): import torch - # from transformers import PretrainedModel - from transformers import BertPreTrainedModel as PretrainedModel + from transformers import BertPreTrainedModel class BackboneUtilsTester(unittest.TestCase): @@ -145,7 +144,8 @@ def test_load_backbone_in_new_model(self): Tests that new model can be created, with its weights instantiated and pretrained backbone weights loaded. """ - class NewModel(PretrainedModel): + # Inherit from PreTrainedModel to ensure that the weights are initialized + class NewModel(BertPreTrainedModel): def __init__(self, config): super().__init__(config) self.backbone = load_backbone(config) From 4a601dc727430f06caa2979f02bf56b4b800b0ce Mon Sep 17 00:00:00 2001 From: Amy Roberts <22614925+amyeroberts@users.noreply.github.com> Date: Thu, 4 Jan 2024 10:25:16 +0000 Subject: [PATCH 11/32] Update configs - docs and validation check --- .../conditional_detr/configuration_conditional_detr.py | 8 ++++++-- .../deformable_detr/configuration_deformable_detr.py | 8 ++++++-- src/transformers/models/deta/configuration_deta.py | 6 ++++-- src/transformers/models/detr/configuration_detr.py | 10 +++++++--- src/transformers/models/dpt/configuration_dpt.py | 6 ++++-- .../models/mask2former/configuration_mask2former.py | 6 ++++-- .../models/maskformer/configuration_maskformer.py | 6 ++++-- .../models/oneformer/configuration_oneformer.py | 6 ++++-- .../configuration_table_transformer.py | 10 +++++++--- src/transformers/models/tvp/configuration_tvp.py | 6 ++++-- .../models/upernet/configuration_upernet.py | 6 ++++-- .../models/vit_hybrid/configuration_vit_hybrid.py | 6 ++++-- .../models/vitmatte/configuration_vitmatte.py | 6 ++++-- 13 files changed, 62 insertions(+), 28 deletions(-) diff --git a/src/transformers/models/conditional_detr/configuration_conditional_detr.py b/src/transformers/models/conditional_detr/configuration_conditional_detr.py index 163865f7332343..8224388e001841 100644 --- a/src/transformers/models/conditional_detr/configuration_conditional_detr.py +++ b/src/transformers/models/conditional_detr/configuration_conditional_detr.py @@ -94,8 +94,7 @@ class ConditionalDetrConfig(PretrainedConfig): will load the corresponding pretrained weights from the timm or transformers library. If `use_pretrained_backbone` is `False`, this loads the backbone's config and uses that to initialize the backbone with random weights. use_pretrained_backbone (`bool`, *optional*, defaults to `True`): - Whether to use pretrained weights for the backbone. You cannot specify both `backbone` and `backbone_config` - when this is `False`. + Whether to use pretrained weights for the backbone. dilation (`bool`, *optional*, defaults to `False`): Whether to replace stride with dilation in the last convolutional block (DC5). Only supported when `use_timm_backbone` = `True`. @@ -179,6 +178,11 @@ def __init__( focal_alpha=0.25, **kwargs, ): + if not use_timm_backbone and use_pretrained_backbone: + raise ValueError( + "It is not possible yet to use pretrained weights without `use_timm_backbone` set to `True`." + ) + if backbone_config is not None and backbone is not None: raise ValueError("You can't specify both `backbone` and `backbone_config`.") diff --git a/src/transformers/models/deformable_detr/configuration_deformable_detr.py b/src/transformers/models/deformable_detr/configuration_deformable_detr.py index 6a9cb70b4221e7..24fa0b4326f8c0 100644 --- a/src/transformers/models/deformable_detr/configuration_deformable_detr.py +++ b/src/transformers/models/deformable_detr/configuration_deformable_detr.py @@ -87,8 +87,7 @@ class DeformableDetrConfig(PretrainedConfig): will load the corresponding pretrained weights from the timm or transformers library. If `use_pretrained_backbone` is `False`, this loads the backbone's config and uses that to initialize the backbone with random weights. use_pretrained_backbone (`bool`, *optional*, defaults to `True`): - Whether to use pretrained weights for the backbone. You cannot specify both `backbone` and `backbone_config` - when this is `False`. + Whether to use pretrained weights for the backbone. dilation (`bool`, *optional*, defaults to `False`): Whether to replace stride with dilation in the last convolutional block (DC5). Only supported when `use_timm_backbone` = `True`. @@ -196,6 +195,11 @@ def __init__( disable_custom_kernels=False, **kwargs, ): + if not use_timm_backbone and use_pretrained_backbone: + raise ValueError( + "It is not possible yet to use pretrained weights without `use_timm_backbone` set to `True`." + ) + if backbone_config is not None and backbone is not None: raise ValueError("You can't specify both `backbone` and `backbone_config`.") diff --git a/src/transformers/models/deta/configuration_deta.py b/src/transformers/models/deta/configuration_deta.py index 124fe9d94d2b59..83bda6d8183650 100644 --- a/src/transformers/models/deta/configuration_deta.py +++ b/src/transformers/models/deta/configuration_deta.py @@ -44,8 +44,7 @@ class DetaConfig(PretrainedConfig): will load the corresponding pretrained weights from the timm or transformers library. If `use_pretrained_backbone` is `False`, this loads the backbone's config and uses that to initialize the backbone with random weights. use_pretrained_backbone (`bool`, *optional*, `False`): - Whether to use pretrained weights for the backbone. You cannot specify both `backbone` and `backbone_config` - when this is `False`. + Whether to use pretrained weights for the backbone. num_queries (`int`, *optional*, defaults to 900): Number of object queries, i.e. detection slots. This is the maximal number of objects [`DetaModel`] can detect in a single image. In case `two_stage` is set to `True`, we use `two_stage_num_proposals` instead. @@ -189,6 +188,9 @@ def __init__( disable_custom_kernels=True, **kwargs, ): + if use_pretrained_backbone: + raise ValueError("Pretrained backbones are not supported yet.") + if backbone_config is not None and backbone is not None: raise ValueError("You can't specify both `backbone` and `backbone_config`.") diff --git a/src/transformers/models/detr/configuration_detr.py b/src/transformers/models/detr/configuration_detr.py index 56fb76a27728b7..9767323a05fa67 100644 --- a/src/transformers/models/detr/configuration_detr.py +++ b/src/transformers/models/detr/configuration_detr.py @@ -94,9 +94,8 @@ class DetrConfig(PretrainedConfig): Name of backbone to use when `backbone_config` is `None`. If `use_pretrained_backbone` is `True`, this will load the corresponding pretrained weights from the timm or transformers library. If `use_pretrained_backbone` is `False`, this loads the backbone's config and uses that to initialize the backbone with random weights. - use_pretrained_backbone (`bool`, *optional*, `False`): - Whether to use pretrained weights for the backbone. You cannot specify both `backbone` and `backbone_config` - when this is `False`. + use_pretrained_backbone (`bool`, *optional*, `True`): + Whether to use pretrained weights for the backbone. dilation (`bool`, *optional*, defaults to `False`): Whether to replace stride with dilation in the last convolutional block (DC5). Only supported when `use_timm_backbone` = `True`. @@ -177,6 +176,11 @@ def __init__( eos_coefficient=0.1, **kwargs, ): + if not use_timm_backbone and use_pretrained_backbone: + raise ValueError( + "It is not possible yet to use pretrained weights without `use_timm_backbone` set to `True`." + ) + if backbone_config is not None and backbone is not None: raise ValueError("You can't specify both `backbone` and `backbone_config`.") diff --git a/src/transformers/models/dpt/configuration_dpt.py b/src/transformers/models/dpt/configuration_dpt.py index 3b533b07b585e0..7771c3a0d399c9 100644 --- a/src/transformers/models/dpt/configuration_dpt.py +++ b/src/transformers/models/dpt/configuration_dpt.py @@ -114,8 +114,7 @@ class DPTConfig(PretrainedConfig): will load the corresponding pretrained weights from the timm or transformers library. If `use_pretrained_backbone` is `False`, this loads the backbone's config and uses that to initialize the backbone with random weights. use_pretrained_backbone (`bool`, *optional*, defaults to `False`): - Whether to use pretrained weights for the backbone. You cannot specify both `backbone` and `backbone_config` - when this is `False`. + Whether to use pretrained weights for the backbone. Example: @@ -175,6 +174,9 @@ def __init__( self.hidden_size = hidden_size self.is_hybrid = is_hybrid + if use_pretrained_backbone: + raise ValueError("Pretrained backbones are not supported yet.") + if backbone_config is not None and backbone is not None: raise ValueError("You can't specify both `backbone` and `backbone_config`.") diff --git a/src/transformers/models/mask2former/configuration_mask2former.py b/src/transformers/models/mask2former/configuration_mask2former.py index 92d472591cf175..9e278d8994cd0a 100644 --- a/src/transformers/models/mask2former/configuration_mask2former.py +++ b/src/transformers/models/mask2former/configuration_mask2former.py @@ -46,8 +46,7 @@ class Mask2FormerConfig(PretrainedConfig): will load the corresponding pretrained weights from the timm or transformers library. If `use_pretrained_backbone` is `False`, this loads the backbone's config and uses that to initialize the backbone with random weights. use_pretrained_backbone (`bool`, *optional*, `False`): - Whether to use pretrained weights for the backbone. You cannot specify both `backbone` and `backbone_config` - when this is `False`. + Whether to use pretrained weights for the backbone. feature_size (`int`, *optional*, defaults to 256): The features (channels) of the resulting feature maps. mask_feature_size (`int`, *optional*, defaults to 256): @@ -159,6 +158,9 @@ def __init__( use_pretrained_backbone=False, **kwargs, ): + if use_pretrained_backbone: + raise ValueError("Pretrained backbones are not supported yet.") + if backbone_config is not None and backbone is not None: raise ValueError("You can't specify both `backbone` and `backbone_config`.") diff --git a/src/transformers/models/maskformer/configuration_maskformer.py b/src/transformers/models/maskformer/configuration_maskformer.py index e00a622ad2882b..e0ba4d4062b341 100644 --- a/src/transformers/models/maskformer/configuration_maskformer.py +++ b/src/transformers/models/maskformer/configuration_maskformer.py @@ -56,8 +56,7 @@ class MaskFormerConfig(PretrainedConfig): will load the corresponding pretrained weights from the timm or transformers library. If `use_pretrained_backbone` is `False`, this loads the backbone's config and uses that to initialize the backbone with random weights. use_pretrained_backbone (`bool`, *optional*, `False`): - Whether to use pretrained weights for the backbone. You cannot specify both `backbone` and `backbone_config` - when this is `False`. + Whether to use pretrained weights for the backbone. decoder_config (`Dict`, *optional*): The configuration passed to the transformer decoder model, if unset the base config for `detr-resnet-50` will be used. @@ -119,6 +118,9 @@ def __init__( use_pretrained_backbone: bool = False, **kwargs, ): + if use_pretrained_backbone: + raise ValueError("Pretrained backbones are not supported yet.") + if backbone_config is not None and backbone is not None: raise ValueError("You can't specify both `backbone` and `backbone_config`.") diff --git a/src/transformers/models/oneformer/configuration_oneformer.py b/src/transformers/models/oneformer/configuration_oneformer.py index b8f837462c1c31..d249a0f8337502 100644 --- a/src/transformers/models/oneformer/configuration_oneformer.py +++ b/src/transformers/models/oneformer/configuration_oneformer.py @@ -45,8 +45,7 @@ class OneFormerConfig(PretrainedConfig): will load the corresponding pretrained weights from the timm or transformers library. If `use_pretrained_backbone` is `False`, this loads the backbone's config and uses that to initialize the backbone with random weights. use_pretrained_backbone (`bool`, *optional*, defaults to `False`): - Whether to use pretrained weights for the backbone. You cannot specify both `backbone` and `backbone_config` - when this is `False`. + Whether to use pretrained weights for the backbone. ignore_value (`int`, *optional*, defaults to 255): Values to be ignored in GT label while calculating loss. num_queries (`int`, *optional*, defaults to 150): @@ -191,6 +190,9 @@ def __init__( common_stride: int = 4, **kwargs, ): + if use_pretrained_backbone: + raise ValueError("Pretrained backbones are not supported yet.") + if backbone_config is not None and backbone is not None: raise ValueError("You can't specify both `backbone` and `backbone_config`.") diff --git a/src/transformers/models/table_transformer/configuration_table_transformer.py b/src/transformers/models/table_transformer/configuration_table_transformer.py index 42457cd63f8d6e..363bf28122bd43 100644 --- a/src/transformers/models/table_transformer/configuration_table_transformer.py +++ b/src/transformers/models/table_transformer/configuration_table_transformer.py @@ -93,9 +93,8 @@ class TableTransformerConfig(PretrainedConfig): Name of backbone to use when `backbone_config` is `None`. If `use_pretrained_backbone` is `True`, this will load the corresponding pretrained weights from the timm or transformers library. If `use_pretrained_backbone` is `False`, this loads the backbone's config and uses that to initialize the backbone with random weights. - use_pretrained_backbone (`bool`, *optional*, `False`): - Whether to use pretrained weights for the backbone. You cannot specify both `backbone` and `backbone_config` - when this is `False`. + use_pretrained_backbone (`bool`, *optional*, `True`): + Whether to use pretrained weights for the backbone. dilation (`bool`, *optional*, defaults to `False`): Whether to replace stride with dilation in the last convolutional block (DC5). Only supported when `use_timm_backbone` = `True`. @@ -177,6 +176,11 @@ def __init__( eos_coefficient=0.1, **kwargs, ): + if not use_timm_backbone and use_pretrained_backbone: + raise ValueError( + "It is not possible yet to use pretrained weights without `use_timm_backbone` set to `True`." + ) + if backbone_config is not None and backbone is not None: raise ValueError("You can't specify both `backbone` and `backbone_config`.") diff --git a/src/transformers/models/tvp/configuration_tvp.py b/src/transformers/models/tvp/configuration_tvp.py index ccdc54ae07747b..23d8b05c117b96 100644 --- a/src/transformers/models/tvp/configuration_tvp.py +++ b/src/transformers/models/tvp/configuration_tvp.py @@ -46,8 +46,7 @@ class TvpConfig(PretrainedConfig): will load the corresponding pretrained weights from the timm or transformers library. If `use_pretrained_backbone` is `False`, this loads the backbone's config and uses that to initialize the backbone with random weights. use_pretrained_backbone (`bool`, *optional*, defaults to `False`): - Whether to use pretrained weights for the backbone. You cannot specify both `backbone` and `backbone_config` - when this is `False`. + Whether to use pretrained weights for the backbone. distance_loss_weight (`float`, *optional*, defaults to 1.0): The weight of distance loss. duration_loss_weight (`float`, *optional*, defaults to 0.1): @@ -125,6 +124,9 @@ def __init__( **kwargs, ): super().__init__(**kwargs) + if use_pretrained_backbone: + raise ValueError("Pretrained backbones are not supported yet.") + if backbone_config is not None and backbone is not None: raise ValueError("You can't specify both `backbone` and `backbone_config`.") diff --git a/src/transformers/models/upernet/configuration_upernet.py b/src/transformers/models/upernet/configuration_upernet.py index 664ef47a5bccf9..2d54c821b8c8d5 100644 --- a/src/transformers/models/upernet/configuration_upernet.py +++ b/src/transformers/models/upernet/configuration_upernet.py @@ -41,8 +41,7 @@ class UperNetConfig(PretrainedConfig): will load the corresponding pretrained weights from the timm or transformers library. If `use_pretrained_backbone` is `False`, this loads the backbone's config and uses that to initialize the backbone with random weights. use_pretrained_backbone (`bool`, *optional*, `False`): - Whether to use pretrained weights for the backbone. You cannot specify both `backbone` and `backbone_config` - when this is `False`. + Whether to use pretrained weights for the backbone. hidden_size (`int`, *optional*, defaults to 512): The number of hidden units in the convolutional layers. initializer_range (`float`, *optional*, defaults to 0.02): @@ -97,6 +96,9 @@ def __init__( **kwargs, ): super().__init__(**kwargs) + if use_pretrained_backbone: + raise ValueError("Pretrained backbones are not supported yet.") + if backbone_config is not None and backbone is not None: raise ValueError("You can't specify both `backbone` and `backbone_config`.") diff --git a/src/transformers/models/vit_hybrid/configuration_vit_hybrid.py b/src/transformers/models/vit_hybrid/configuration_vit_hybrid.py index 2b9b0074ac190a..71cd4b2b3da247 100644 --- a/src/transformers/models/vit_hybrid/configuration_vit_hybrid.py +++ b/src/transformers/models/vit_hybrid/configuration_vit_hybrid.py @@ -45,8 +45,7 @@ class ViTHybridConfig(PretrainedConfig): will load the corresponding pretrained weights from the timm or transformers library. If `use_pretrained_backbone` is `False`, this loads the backbone's config and uses that to initialize the backbone with random weights. use_pretrained_backbone (`bool`, *optional*, defaults to `False`): - Whether to use pretrained weights for the backbone. You cannot specify both `backbone` and `backbone_config` - when this is `False`. + Whether to use pretrained weights for the backbone. hidden_size (`int`, *optional*, defaults to 768): Dimensionality of the encoder layers and the pooler layer. num_hidden_layers (`int`, *optional*, defaults to 12): @@ -116,6 +115,9 @@ def __init__( **kwargs, ): super().__init__(**kwargs) + if use_pretrained_backbone: + raise ValueError("Pretrained backbones are not supported yet.") + if backbone_config is not None and backbone is not None: raise ValueError("You can't specify both `backbone` and `backbone_config`.") diff --git a/src/transformers/models/vitmatte/configuration_vitmatte.py b/src/transformers/models/vitmatte/configuration_vitmatte.py index 9a42a336d14370..b90a0f29e74798 100644 --- a/src/transformers/models/vitmatte/configuration_vitmatte.py +++ b/src/transformers/models/vitmatte/configuration_vitmatte.py @@ -46,8 +46,7 @@ class VitMatteConfig(PretrainedConfig): will load the corresponding pretrained weights from the timm or transformers library. If `use_pretrained_backbone` is `False`, this loads the backbone's config and uses that to initialize the backbone with random weights. use_pretrained_backbone (`bool`, *optional*, defaults to `False`): - Whether to use pretrained weights for the backbone. You cannot specify both `backbone` and `backbone_config` - when this is `False`. + Whether to use pretrained weights for the backbone. hidden_size (`int`, *optional*, defaults to 384): The number of input channels of the decoder. batch_norm_eps (`float`, *optional*, defaults to 1e-05): @@ -90,6 +89,9 @@ def __init__( ): super().__init__(**kwargs) + if use_pretrained_backbone: + raise ValueError("Pretrained backbones are not supported yet.") + if backbone_config is not None and backbone is not None: raise ValueError("You can't specify both `backbone` and `backbone_config`.") From 4506d08783841f7f3f62288a954f74d2bf772d21 Mon Sep 17 00:00:00 2001 From: amyeroberts <22614925+amyeroberts@users.noreply.github.com> Date: Thu, 4 Jan 2024 10:25:34 +0000 Subject: [PATCH 12/32] Update src/transformers/utils/backbone_utils.py Co-authored-by: Arthur <48595927+ArthurZucker@users.noreply.github.com> --- src/transformers/utils/backbone_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/utils/backbone_utils.py b/src/transformers/utils/backbone_utils.py index e7ae99d40e2d0a..566d19ff26190c 100644 --- a/src/transformers/utils/backbone_utils.py +++ b/src/transformers/utils/backbone_utils.py @@ -311,7 +311,7 @@ def load_backbone(config): raise ValueError("Cannot specify both config.backbone_config and config.backbone") if backbone_config is not None or use_timm_backbone or backbone_checkpoint is not None: - # This is a config from the parent model the has a backbone. This contains the pretrained backbone checkpoint + # This is a config from the parent model that has a backbone. This contains the pretrained backbone checkpoint # if specified. # By default, most models don't have use_pretrained_backbone set. if use_pretrained_backbone: From cbc0f6ab443c0f16b6cf8afffbe20f50c2bdd8b9 Mon Sep 17 00:00:00 2001 From: Amy Roberts <22614925+amyeroberts@users.noreply.github.com> Date: Thu, 4 Jan 2024 10:50:37 +0000 Subject: [PATCH 13/32] Clarify exception message --- .../models/conditional_detr/configuration_conditional_detr.py | 2 +- .../models/deformable_detr/configuration_deformable_detr.py | 2 +- src/transformers/models/detr/configuration_detr.py | 2 +- .../models/table_transformer/configuration_table_transformer.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/transformers/models/conditional_detr/configuration_conditional_detr.py b/src/transformers/models/conditional_detr/configuration_conditional_detr.py index 8224388e001841..06cb33bab74645 100644 --- a/src/transformers/models/conditional_detr/configuration_conditional_detr.py +++ b/src/transformers/models/conditional_detr/configuration_conditional_detr.py @@ -180,7 +180,7 @@ def __init__( ): if not use_timm_backbone and use_pretrained_backbone: raise ValueError( - "It is not possible yet to use pretrained weights without `use_timm_backbone` set to `True`." + "Loading pretrained backbone weights from the transformers library is not supported yet. `use_timm_backbone` must be set to `True` when `use_pretrained_backbone=True`" ) if backbone_config is not None and backbone is not None: diff --git a/src/transformers/models/deformable_detr/configuration_deformable_detr.py b/src/transformers/models/deformable_detr/configuration_deformable_detr.py index 24fa0b4326f8c0..994b7b77e5c819 100644 --- a/src/transformers/models/deformable_detr/configuration_deformable_detr.py +++ b/src/transformers/models/deformable_detr/configuration_deformable_detr.py @@ -197,7 +197,7 @@ def __init__( ): if not use_timm_backbone and use_pretrained_backbone: raise ValueError( - "It is not possible yet to use pretrained weights without `use_timm_backbone` set to `True`." + "Loading pretrained backbone weights from the transformers library is not supported yet. `use_timm_backbone` must be set to `True` when `use_pretrained_backbone=True`" ) if backbone_config is not None and backbone is not None: diff --git a/src/transformers/models/detr/configuration_detr.py b/src/transformers/models/detr/configuration_detr.py index 9767323a05fa67..b6834cd940e041 100644 --- a/src/transformers/models/detr/configuration_detr.py +++ b/src/transformers/models/detr/configuration_detr.py @@ -178,7 +178,7 @@ def __init__( ): if not use_timm_backbone and use_pretrained_backbone: raise ValueError( - "It is not possible yet to use pretrained weights without `use_timm_backbone` set to `True`." + "Loading pretrained backbone weights from the transformers library is not supported yet. `use_timm_backbone` must be set to `True` when `use_pretrained_backbone=True`" ) if backbone_config is not None and backbone is not None: diff --git a/src/transformers/models/table_transformer/configuration_table_transformer.py b/src/transformers/models/table_transformer/configuration_table_transformer.py index 363bf28122bd43..613863206bd416 100644 --- a/src/transformers/models/table_transformer/configuration_table_transformer.py +++ b/src/transformers/models/table_transformer/configuration_table_transformer.py @@ -178,7 +178,7 @@ def __init__( ): if not use_timm_backbone and use_pretrained_backbone: raise ValueError( - "It is not possible yet to use pretrained weights without `use_timm_backbone` set to `True`." + "Loading pretrained backbone weights from the transformers library is not supported yet. `use_timm_backbone` must be set to `True` when `use_pretrained_backbone=True`" ) if backbone_config is not None and backbone is not None: From 89804ae6ff2eb36dd11772a5fb9192e559aed15a Mon Sep 17 00:00:00 2001 From: Amy Roberts <22614925+amyeroberts@users.noreply.github.com> Date: Thu, 4 Jan 2024 11:07:15 +0000 Subject: [PATCH 14/32] Update config init in tests --- tests/models/conditional_detr/test_modeling_conditional_detr.py | 1 + tests/models/deformable_detr/test_modeling_deformable_detr.py | 1 + tests/models/detr/test_modeling_detr.py | 1 + .../models/table_transformer/test_modeling_table_transformer.py | 1 + 4 files changed, 4 insertions(+) diff --git a/tests/models/conditional_detr/test_modeling_conditional_detr.py b/tests/models/conditional_detr/test_modeling_conditional_detr.py index 23424ba5e9f4ab..d1152ed8622b9c 100644 --- a/tests/models/conditional_detr/test_modeling_conditional_detr.py +++ b/tests/models/conditional_detr/test_modeling_conditional_detr.py @@ -135,6 +135,7 @@ def get_config(self): use_timm_backbone=False, backbone_config=resnet_config, backbone=None, + use_pretrained_backbone=False, ) def prepare_config_and_inputs_for_common(self): diff --git a/tests/models/deformable_detr/test_modeling_deformable_detr.py b/tests/models/deformable_detr/test_modeling_deformable_detr.py index 9ff1f5c24b1173..8487b5dc694d54 100644 --- a/tests/models/deformable_detr/test_modeling_deformable_detr.py +++ b/tests/models/deformable_detr/test_modeling_deformable_detr.py @@ -153,6 +153,7 @@ def get_config(self): backbone=None, backbone_config=resnet_config, backbone=None, + use_pretrained_backbone=False, ) def prepare_config_and_inputs_for_common(self): diff --git a/tests/models/detr/test_modeling_detr.py b/tests/models/detr/test_modeling_detr.py index bc8b96b2cf2fb0..59b071e031aa8a 100644 --- a/tests/models/detr/test_modeling_detr.py +++ b/tests/models/detr/test_modeling_detr.py @@ -131,6 +131,7 @@ def get_config(self): use_timm_backbone=False, backbone_config=resnet_config, backbone=None, + use_pretrained_backbone=False, ) def prepare_config_and_inputs_for_common(self): diff --git a/tests/models/table_transformer/test_modeling_table_transformer.py b/tests/models/table_transformer/test_modeling_table_transformer.py index 361a916efdd8fb..79da1d191063ab 100644 --- a/tests/models/table_transformer/test_modeling_table_transformer.py +++ b/tests/models/table_transformer/test_modeling_table_transformer.py @@ -132,6 +132,7 @@ def get_config(self): use_timm_backbone=False, backbone_config=resnet_config, backbone=None, + use_pretrained_backbone=False, ) def prepare_config_and_inputs_for_common(self): From 6300aef748a9a034ac4249240370535efb3c34a3 Mon Sep 17 00:00:00 2001 From: Amy Roberts <22614925+amyeroberts@users.noreply.github.com> Date: Thu, 4 Jan 2024 14:28:50 +0000 Subject: [PATCH 15/32] Add test for when use_timm_backbone=True --- src/transformers/utils/backbone_utils.py | 44 +++++++++++++++--------- tests/utils/test_backbone_utils.py | 25 +++++++++++++- 2 files changed, 51 insertions(+), 18 deletions(-) diff --git a/src/transformers/utils/backbone_utils.py b/src/transformers/utils/backbone_utils.py index 566d19ff26190c..22c35c3f9b6e06 100644 --- a/src/transformers/utils/backbone_utils.py +++ b/src/transformers/utils/backbone_utils.py @@ -310,22 +310,32 @@ def load_backbone(config): if backbone_config is not None and backbone_checkpoint is not None and use_pretrained_backbone is not None: raise ValueError("Cannot specify both config.backbone_config and config.backbone") - if backbone_config is not None or use_timm_backbone or backbone_checkpoint is not None: - # This is a config from the parent model that has a backbone. This contains the pretrained backbone checkpoint - # if specified. - # By default, most models don't have use_pretrained_backbone set. - if use_pretrained_backbone: - if backbone_checkpoint is None: - raise ValueError("config.backbone must be set if use_pretrained_backbone is True") - backbone = AutoBackbone.from_pretrained( - backbone_checkpoint, - use_timm_backbone=getattr(config, "use_timm_backbone", False), - ) - else: - if backbone_config is None: - backbone_config = AutoConfig.from_pretrained(backbone_checkpoint) - backbone = AutoBackbone.from_config(config=backbone_config) + # If any of thhe following are set, then the config passed in is from a model which contains a backbone. + if ( + backbone_config is None + and use_timm_backbone is None + and backbone_checkpoint is None + and backbone_checkpoint is None + ): + return AutoBackbone.from_config(config=config) + + # config from the parent model that has a backbone + if use_timm_backbone: + if backbone_checkpoint is None: + raise ValueError("config.backbone must be set if use_timm_backbone is True") + # Because of how timm backbones were originally added to models, we need to pass in use_pretrained_backbone + # to determine whether to load the pretrained weights. + backbone = AutoBackbone.from_pretrained( + backbone_checkpoint, use_timm_backbone=use_timm_backbone, use_pretrained_backbone=use_pretrained_backbone + ) + elif use_pretrained_backbone: + if backbone_checkpoint is None: + raise ValueError("config.backbone must be set if use_pretrained_backbone is True") + backbone = AutoBackbone.from_pretrained(backbone_checkpoint) else: - # This is a backbone config, so we just initialize the backbone model with random weights directly. - backbone = AutoBackbone.from_config(config=config) + if backbone_config is None and backbone_checkpoint is None: + raise ValueError("Either config.backbone_config or config.backbone must be set") + if backbone_config is None: + backbone_config = AutoConfig.from_pretrained(backbone_checkpoint) + backbone = AutoBackbone.from_config(config=backbone_config) return backbone diff --git a/tests/utils/test_backbone_utils.py b/tests/utils/test_backbone_utils.py index 3326d12651d8d3..0c3ff4866e8379 100644 --- a/tests/utils/test_backbone_utils.py +++ b/tests/utils/test_backbone_utils.py @@ -16,7 +16,7 @@ import pytest -from transformers import MaskFormerConfig +from transformers import DetrConfig, MaskFormerConfig from transformers.testing_utils import require_torch, slow from transformers.utils.backbone_utils import ( BackboneMixin, @@ -185,3 +185,26 @@ def get_equal_not_equal_weights(model_0, model_1): self.assertEqual(len(equal_weights), 20) # Linear layers are still initialized randomly self.assertEqual(len(not_equal_weights), 4) + + # Check loading in timm backbone + config = DetrConfig(use_pretrained_backbone=False, backbone="resnet18", use_timm_backbone=True) + model_0 = NewModel(config) + model_1 = NewModel(config) + equal_weights, not_equal_weights = get_equal_not_equal_weights(model_0, model_1) + + # Norm layers are always initialized with the same weights + equal_weights = [w for w in equal_weights if "bn" not in w and "downsample.1" not in w] + self.assertEqual(len(equal_weights), 0) + self.assertEqual(len(not_equal_weights), 24) + + # Now we create a new model with backbone weights that are pretrained + config.use_pretrained_backbone = True + model_0 = NewModel(config) + model_1 = NewModel(config) + equal_weights, not_equal_weights = get_equal_not_equal_weights(model_0, model_1) + + # Norm layers are always initialized with the same weights + equal_weights = [w for w in equal_weights if "bn" not in w and "downsample.1" not in w] + self.assertEqual(len(equal_weights), 20) + # Linear layers are still initialized randomly + self.assertEqual(len(not_equal_weights), 4) From 6870317a7b1d4a1eeee501f55b8975e2c1aae6f2 Mon Sep 17 00:00:00 2001 From: Amy Roberts <22614925+amyeroberts@users.noreply.github.com> Date: Thu, 4 Jan 2024 15:17:26 +0000 Subject: [PATCH 16/32] Use load_backbone instead --- src/transformers/models/dpt/modeling_dpt.py | 6 +++--- utils/check_config_attributes.py | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/transformers/models/dpt/modeling_dpt.py b/src/transformers/models/dpt/modeling_dpt.py index aad3330279f051..a3899f86ff8457 100755 --- a/src/transformers/models/dpt/modeling_dpt.py +++ b/src/transformers/models/dpt/modeling_dpt.py @@ -1075,10 +1075,10 @@ def __init__(self, config): super().__init__(config) self.backbone = None - if config.backbone_config is not None and config.is_hybrid is False: - self.backbone = load_backbone(config) - else: + if config.is_hybrid: self.dpt = DPTModel(config, add_pooling_layer=False) + else: + self.backbone = load_backbone(config) # Neck self.neck = DPTNeck(config) diff --git a/utils/check_config_attributes.py b/utils/check_config_attributes.py index cf4348fb530388..f97fd2ef31f5df 100644 --- a/utils/check_config_attributes.py +++ b/utils/check_config_attributes.py @@ -235,6 +235,8 @@ def check_attribute_being_used(config_class, attributes, default_value, source_s "out_indices", "sampling_rate", "use_pretrained_backbone", + "backbone", + "backbone_config", ] attributes_used_in_generation = ["encoder_no_repeat_ngram_size"] From aa3376c50ec1fa34d73b98bf6e95237ea71f8429 Mon Sep 17 00:00:00 2001 From: Amy Roberts <22614925+amyeroberts@users.noreply.github.com> Date: Thu, 4 Jan 2024 18:23:38 +0000 Subject: [PATCH 17/32] Add use_timm_backbone to the model configs --- src/transformers/models/deta/configuration_deta.py | 5 +++++ src/transformers/models/dpt/configuration_dpt.py | 5 +++++ .../models/mask2former/configuration_mask2former.py | 5 +++++ .../models/maskformer/configuration_maskformer.py | 5 +++++ src/transformers/models/oneformer/configuration_oneformer.py | 5 +++++ src/transformers/models/tvp/configuration_tvp.py | 4 ++++ src/transformers/models/upernet/configuration_upernet.py | 5 +++++ .../models/vit_hybrid/configuration_vit_hybrid.py | 5 +++++ src/transformers/models/vitmatte/configuration_vitmatte.py | 5 +++++ utils/check_config_attributes.py | 2 ++ 10 files changed, 46 insertions(+) diff --git a/src/transformers/models/deta/configuration_deta.py b/src/transformers/models/deta/configuration_deta.py index 83bda6d8183650..4dccce1c82d6e1 100644 --- a/src/transformers/models/deta/configuration_deta.py +++ b/src/transformers/models/deta/configuration_deta.py @@ -45,6 +45,9 @@ class DetaConfig(PretrainedConfig): is `False`, this loads the backbone's config and uses that to initialize the backbone with random weights. use_pretrained_backbone (`bool`, *optional*, `False`): Whether to use pretrained weights for the backbone. + use_timm_backbone (`bool`, *optional*, `False`): + Whether to load `backbone` from the timm library. If `False`, the backbone is loaded from the transformers + library. num_queries (`int`, *optional*, defaults to 900): Number of object queries, i.e. detection slots. This is the maximal number of objects [`DetaModel`] can detect in a single image. In case `two_stage` is set to `True`, we use `two_stage_num_proposals` instead. @@ -148,6 +151,7 @@ def __init__( backbone_config=None, backbone=None, use_pretrained_backbone=False, + use_timm_backbone=False, num_queries=900, max_position_embeddings=2048, encoder_layers=6, @@ -209,6 +213,7 @@ def __init__( self.backbone_config = backbone_config self.backbone = backbone self.use_pretrained_backbone = use_pretrained_backbone + self.use_timm_backbone = use_timm_backbone self.num_queries = num_queries self.max_position_embeddings = max_position_embeddings self.d_model = d_model diff --git a/src/transformers/models/dpt/configuration_dpt.py b/src/transformers/models/dpt/configuration_dpt.py index 7771c3a0d399c9..4d6ac65767c8ce 100644 --- a/src/transformers/models/dpt/configuration_dpt.py +++ b/src/transformers/models/dpt/configuration_dpt.py @@ -115,6 +115,9 @@ class DPTConfig(PretrainedConfig): is `False`, this loads the backbone's config and uses that to initialize the backbone with random weights. use_pretrained_backbone (`bool`, *optional*, defaults to `False`): Whether to use pretrained weights for the backbone. + use_timm_backbone (`bool`, *optional*, `False`): + Whether to load `backbone` from the timm library. If `False`, the backbone is loaded from the transformers + library. Example: @@ -167,6 +170,7 @@ def __init__( backbone_config=None, backbone=None, use_pretrained_backbone=False, + use_timm_backbone=False, **kwargs, ): super().__init__(**kwargs) @@ -226,6 +230,7 @@ def __init__( self.backbone = backbone self.use_pretrained_backbone = use_pretrained_backbone + self.use_timm_backbone = use_timm_backbone self.num_hidden_layers = None if use_autobackbone else num_hidden_layers self.num_attention_heads = None if use_autobackbone else num_attention_heads self.intermediate_size = None if use_autobackbone else intermediate_size diff --git a/src/transformers/models/mask2former/configuration_mask2former.py b/src/transformers/models/mask2former/configuration_mask2former.py index 9e278d8994cd0a..a6255df0e1a868 100644 --- a/src/transformers/models/mask2former/configuration_mask2former.py +++ b/src/transformers/models/mask2former/configuration_mask2former.py @@ -47,6 +47,9 @@ class Mask2FormerConfig(PretrainedConfig): is `False`, this loads the backbone's config and uses that to initialize the backbone with random weights. use_pretrained_backbone (`bool`, *optional*, `False`): Whether to use pretrained weights for the backbone. + use_timm_backbone (`bool`, *optional*, `False`): + Whether to load `backbone` from the timm library. If `False`, the backbone is loaded from the transformers + library. feature_size (`int`, *optional*, defaults to 256): The features (channels) of the resulting feature maps. mask_feature_size (`int`, *optional*, defaults to 256): @@ -156,6 +159,7 @@ def __init__( output_auxiliary_logits: bool = None, backbone=None, use_pretrained_backbone=False, + use_timm_backbone=False, **kwargs, ): if use_pretrained_backbone: @@ -225,6 +229,7 @@ def __init__( self.num_hidden_layers = decoder_layers self.backbone = backbone self.use_pretrained_backbone = use_pretrained_backbone + self.use_timm_backbone = use_timm_backbone super().__init__(**kwargs) diff --git a/src/transformers/models/maskformer/configuration_maskformer.py b/src/transformers/models/maskformer/configuration_maskformer.py index e0ba4d4062b341..85034876f744d9 100644 --- a/src/transformers/models/maskformer/configuration_maskformer.py +++ b/src/transformers/models/maskformer/configuration_maskformer.py @@ -57,6 +57,9 @@ class MaskFormerConfig(PretrainedConfig): is `False`, this loads the backbone's config and uses that to initialize the backbone with random weights. use_pretrained_backbone (`bool`, *optional*, `False`): Whether to use pretrained weights for the backbone. + use_timm_backbone (`bool`, *optional*, `False`): + Whether to load `backbone` from the timm library. If `False`, the backbone is loaded from the transformers + library. decoder_config (`Dict`, *optional*): The configuration passed to the transformer decoder model, if unset the base config for `detr-resnet-50` will be used. @@ -116,6 +119,7 @@ def __init__( output_auxiliary_logits: Optional[bool] = None, backbone: Optional[str] = None, use_pretrained_backbone: bool = False, + use_timm_backbone: bool = False, **kwargs, ): if use_pretrained_backbone: @@ -187,6 +191,7 @@ def __init__( self.num_hidden_layers = self.decoder_config.num_hidden_layers self.backbone = backbone self.use_pretrained_backbone = use_pretrained_backbone + self.use_timm_backbone = use_timm_backbone super().__init__(**kwargs) @classmethod diff --git a/src/transformers/models/oneformer/configuration_oneformer.py b/src/transformers/models/oneformer/configuration_oneformer.py index d249a0f8337502..a6e5cd2a9d1c2a 100644 --- a/src/transformers/models/oneformer/configuration_oneformer.py +++ b/src/transformers/models/oneformer/configuration_oneformer.py @@ -46,6 +46,9 @@ class OneFormerConfig(PretrainedConfig): is `False`, this loads the backbone's config and uses that to initialize the backbone with random weights. use_pretrained_backbone (`bool`, *optional*, defaults to `False`): Whether to use pretrained weights for the backbone. + use_timm_backbone (`bool`, *optional*, `False`): + Whether to load `backbone` from the timm library. If `False`, the backbone is loaded from the transformers + library. ignore_value (`int`, *optional*, defaults to 255): Values to be ignored in GT label while calculating loss. num_queries (`int`, *optional*, defaults to 150): @@ -148,6 +151,7 @@ def __init__( backbone_config: Optional[Dict] = None, backbone: Optional[str] = None, use_pretrained_backbone: bool = False, + use_timm_backbone: bool = False, ignore_value: int = 255, num_queries: int = 150, no_object_weight: int = 0.1, @@ -218,6 +222,7 @@ def __init__( self.backbone_config = backbone_config self.backbone = backbone self.use_pretrained_backbone = use_pretrained_backbone + self.use_timm_backbone = use_timm_backbone self.ignore_value = ignore_value self.num_queries = num_queries self.no_object_weight = no_object_weight diff --git a/src/transformers/models/tvp/configuration_tvp.py b/src/transformers/models/tvp/configuration_tvp.py index 23d8b05c117b96..ef4f2c840bdd23 100644 --- a/src/transformers/models/tvp/configuration_tvp.py +++ b/src/transformers/models/tvp/configuration_tvp.py @@ -47,6 +47,9 @@ class TvpConfig(PretrainedConfig): is `False`, this loads the backbone's config and uses that to initialize the backbone with random weights. use_pretrained_backbone (`bool`, *optional*, defaults to `False`): Whether to use pretrained weights for the backbone. + use_timm_backbone (`bool`, *optional*, `False`): + Whether to load `backbone` from the timm library. If `False`, the backbone is loaded from the transformers + library. distance_loss_weight (`float`, *optional*, defaults to 1.0): The weight of distance loss. duration_loss_weight (`float`, *optional*, defaults to 0.1): @@ -144,6 +147,7 @@ def __init__( self.backbone_config = backbone_config self.backbone = backbone self.use_pretrained_backbone = use_pretrained_backbone + self.use_timm_backbone = use_timm_backbone self.distance_loss_weight = distance_loss_weight self.duration_loss_weight = duration_loss_weight self.visual_prompter_type = visual_prompter_type diff --git a/src/transformers/models/upernet/configuration_upernet.py b/src/transformers/models/upernet/configuration_upernet.py index 2d54c821b8c8d5..7c5813a5f2b5e4 100644 --- a/src/transformers/models/upernet/configuration_upernet.py +++ b/src/transformers/models/upernet/configuration_upernet.py @@ -42,6 +42,9 @@ class UperNetConfig(PretrainedConfig): is `False`, this loads the backbone's config and uses that to initialize the backbone with random weights. use_pretrained_backbone (`bool`, *optional*, `False`): Whether to use pretrained weights for the backbone. + use_timm_backbone (`bool`, *optional*, `False`): + Whether to load `backbone` from the timm library. If `False`, the backbone is loaded from the transformers + library. hidden_size (`int`, *optional*, defaults to 512): The number of hidden units in the convolutional layers. initializer_range (`float`, *optional*, defaults to 0.02): @@ -83,6 +86,7 @@ def __init__( backbone_config=None, backbone=None, use_pretrained_backbone=False, + use_timm_backbone=False, hidden_size=512, initializer_range=0.02, pool_scales=[1, 2, 3, 6], @@ -116,6 +120,7 @@ def __init__( self.backbone_config = backbone_config self.backbone = backbone self.use_pretrained_backbone = use_pretrained_backbone + self.use_timm_backbone = use_timm_backbone self.hidden_size = hidden_size self.initializer_range = initializer_range self.pool_scales = pool_scales diff --git a/src/transformers/models/vit_hybrid/configuration_vit_hybrid.py b/src/transformers/models/vit_hybrid/configuration_vit_hybrid.py index 71cd4b2b3da247..45d2e072286415 100644 --- a/src/transformers/models/vit_hybrid/configuration_vit_hybrid.py +++ b/src/transformers/models/vit_hybrid/configuration_vit_hybrid.py @@ -46,6 +46,9 @@ class ViTHybridConfig(PretrainedConfig): is `False`, this loads the backbone's config and uses that to initialize the backbone with random weights. use_pretrained_backbone (`bool`, *optional*, defaults to `False`): Whether to use pretrained weights for the backbone. + use_timm_backbone (`bool`, *optional*, `False`): + Whether to load `backbone` from the timm library. If `False`, the backbone is loaded from the transformers + library. hidden_size (`int`, *optional*, defaults to 768): Dimensionality of the encoder layers and the pooler layer. num_hidden_layers (`int`, *optional*, defaults to 12): @@ -98,6 +101,7 @@ def __init__( backbone_config=None, backbone=None, use_pretrained_backbone=False, + use_timm_backbone=False, hidden_size=768, num_hidden_layers=12, num_attention_heads=12, @@ -148,6 +152,7 @@ def __init__( self.backbone_config = backbone_config self.backbone = backbone self.use_pretrained_backbone = use_pretrained_backbone + self.use_timm_backbone = use_timm_backbone self.hidden_size = hidden_size self.num_hidden_layers = num_hidden_layers self.num_attention_heads = num_attention_heads diff --git a/src/transformers/models/vitmatte/configuration_vitmatte.py b/src/transformers/models/vitmatte/configuration_vitmatte.py index b90a0f29e74798..a87aaeb5340aa5 100644 --- a/src/transformers/models/vitmatte/configuration_vitmatte.py +++ b/src/transformers/models/vitmatte/configuration_vitmatte.py @@ -47,6 +47,9 @@ class VitMatteConfig(PretrainedConfig): is `False`, this loads the backbone's config and uses that to initialize the backbone with random weights. use_pretrained_backbone (`bool`, *optional*, defaults to `False`): Whether to use pretrained weights for the backbone. + use_timm_backbone (`bool`, *optional*, `False`): + Whether to load `backbone` from the timm library. If `False`, the backbone is loaded from the transformers + library. hidden_size (`int`, *optional*, defaults to 384): The number of input channels of the decoder. batch_norm_eps (`float`, *optional*, defaults to 1e-05): @@ -80,6 +83,7 @@ def __init__( backbone_config: PretrainedConfig = None, backbone=None, use_pretrained_backbone=False, + use_timm_backbone=False, hidden_size: int = 384, batch_norm_eps: float = 1e-5, initializer_range: float = 0.02, @@ -109,6 +113,7 @@ def __init__( self.backbone_config = backbone_config self.backbone = backbone self.use_pretrained_backbone = use_pretrained_backbone + self.use_timm_backbone = use_timm_backbone self.batch_norm_eps = batch_norm_eps self.hidden_size = hidden_size self.initializer_range = initializer_range diff --git a/utils/check_config_attributes.py b/utils/check_config_attributes.py index f97fd2ef31f5df..a7fb1f0380d9bd 100644 --- a/utils/check_config_attributes.py +++ b/utils/check_config_attributes.py @@ -234,9 +234,11 @@ def check_attribute_being_used(config_class, attributes, default_value, source_s "out_features", "out_indices", "sampling_rate", + # backbone related arguments passed to load_backbone "use_pretrained_backbone", "backbone", "backbone_config", + "use_timm_backbone" ] attributes_used_in_generation = ["encoder_no_repeat_ngram_size"] From 737fd0ca3d0a26a880046d130263a3e9d3a0ddac Mon Sep 17 00:00:00 2001 From: Amy Roberts <22614925+amyeroberts@users.noreply.github.com> Date: Thu, 4 Jan 2024 18:38:58 +0000 Subject: [PATCH 18/32] Add backbone_kwargs to config --- .../conditional_detr/configuration_conditional_detr.py | 4 +++- .../deformable_detr/configuration_deformable_detr.py | 4 +++- src/transformers/models/deta/configuration_deta.py | 4 ++++ src/transformers/models/detr/configuration_detr.py | 4 +++- src/transformers/models/dpt/configuration_dpt.py | 4 ++++ .../models/mask2former/configuration_mask2former.py | 10 +++++++--- .../models/maskformer/configuration_maskformer.py | 4 ++++ .../models/oneformer/configuration_oneformer.py | 4 ++++ .../configuration_table_transformer.py | 4 +++- src/transformers/models/tvp/configuration_tvp.py | 5 +++++ .../models/upernet/configuration_upernet.py | 4 ++++ .../models/vit_hybrid/configuration_vit_hybrid.py | 4 ++++ .../models/vitmatte/configuration_vitmatte.py | 4 ++++ src/transformers/utils/backbone_utils.py | 10 +++++----- 14 files changed, 57 insertions(+), 12 deletions(-) diff --git a/src/transformers/models/conditional_detr/configuration_conditional_detr.py b/src/transformers/models/conditional_detr/configuration_conditional_detr.py index 06cb33bab74645..1630f75bc2406e 100644 --- a/src/transformers/models/conditional_detr/configuration_conditional_detr.py +++ b/src/transformers/models/conditional_detr/configuration_conditional_detr.py @@ -95,6 +95,8 @@ class ConditionalDetrConfig(PretrainedConfig): is `False`, this loads the backbone's config and uses that to initialize the backbone with random weights. use_pretrained_backbone (`bool`, *optional*, defaults to `True`): Whether to use pretrained weights for the backbone. + backbone_kwargs (`dict`, *optional*): + Keyword arguments to be passed to the backbone constructor e.g. `{'out_indices': (0, 1, 2, 3)}`. dilation (`bool`, *optional*, defaults to `False`): Whether to replace stride with dilation in the last convolutional block (DC5). Only supported when `use_timm_backbone` = `True`. @@ -225,7 +227,7 @@ def __init__( self.position_embedding_type = position_embedding_type self.backbone = backbone self.use_pretrained_backbone = use_pretrained_backbone - self.backbone_kwargs = backbone_kwargs + self.backbone_kwargs = backbone_kwargs if backbone_kwargs is not None else {} self.dilation = dilation # Hungarian matcher self.class_cost = class_cost diff --git a/src/transformers/models/deformable_detr/configuration_deformable_detr.py b/src/transformers/models/deformable_detr/configuration_deformable_detr.py index 994b7b77e5c819..745cdcca6e5da8 100644 --- a/src/transformers/models/deformable_detr/configuration_deformable_detr.py +++ b/src/transformers/models/deformable_detr/configuration_deformable_detr.py @@ -88,6 +88,8 @@ class DeformableDetrConfig(PretrainedConfig): is `False`, this loads the backbone's config and uses that to initialize the backbone with random weights. use_pretrained_backbone (`bool`, *optional*, defaults to `True`): Whether to use pretrained weights for the backbone. + backbone_kwargs (`dict`, *optional*): + Keyword arguments to be passed to the backbone constructor e.g. `{'out_indices': (0, 1, 2, 3)}`. dilation (`bool`, *optional*, defaults to `False`): Whether to replace stride with dilation in the last convolutional block (DC5). Only supported when `use_timm_backbone` = `True`. @@ -240,7 +242,7 @@ def __init__( self.position_embedding_type = position_embedding_type self.backbone = backbone self.use_pretrained_backbone = use_pretrained_backbone - self.backbone_kwargs = backbone_kwargs + self.backbone_kwargs = backbone_kwargs if backbone_kwargs is not None else {} self.dilation = dilation # deformable attributes self.num_feature_levels = num_feature_levels diff --git a/src/transformers/models/deta/configuration_deta.py b/src/transformers/models/deta/configuration_deta.py index 4dccce1c82d6e1..a30e496ee50612 100644 --- a/src/transformers/models/deta/configuration_deta.py +++ b/src/transformers/models/deta/configuration_deta.py @@ -48,6 +48,8 @@ class DetaConfig(PretrainedConfig): use_timm_backbone (`bool`, *optional*, `False`): Whether to load `backbone` from the timm library. If `False`, the backbone is loaded from the transformers library. + backbone_kwargs (`dict`, *optional*): + Keyword arguments to be passed to the backbone constructor e.g. `{'out_indices': (0, 1, 2, 3)}`. num_queries (`int`, *optional*, defaults to 900): Number of object queries, i.e. detection slots. This is the maximal number of objects [`DetaModel`] can detect in a single image. In case `two_stage` is set to `True`, we use `two_stage_num_proposals` instead. @@ -152,6 +154,7 @@ def __init__( backbone=None, use_pretrained_backbone=False, use_timm_backbone=False, + backbone_kwargs=None, num_queries=900, max_position_embeddings=2048, encoder_layers=6, @@ -214,6 +217,7 @@ def __init__( self.backbone = backbone self.use_pretrained_backbone = use_pretrained_backbone self.use_timm_backbone = use_timm_backbone + self.backbone_kwargs = backbone_kwargs if backbone_kwargs is not None else {} self.num_queries = num_queries self.max_position_embeddings = max_position_embeddings self.d_model = d_model diff --git a/src/transformers/models/detr/configuration_detr.py b/src/transformers/models/detr/configuration_detr.py index b6834cd940e041..b72d372e117c48 100644 --- a/src/transformers/models/detr/configuration_detr.py +++ b/src/transformers/models/detr/configuration_detr.py @@ -96,6 +96,8 @@ class DetrConfig(PretrainedConfig): is `False`, this loads the backbone's config and uses that to initialize the backbone with random weights. use_pretrained_backbone (`bool`, *optional*, `True`): Whether to use pretrained weights for the backbone. + backbone_kwargs (`dict`, *optional*): + Keyword arguments to be passed to the backbone constructor e.g. `{'out_indices': (0, 1, 2, 3)}`. dilation (`bool`, *optional*, defaults to `False`): Whether to replace stride with dilation in the last convolutional block (DC5). Only supported when `use_timm_backbone` = `True`. @@ -225,7 +227,7 @@ def __init__( self.position_embedding_type = position_embedding_type self.backbone = backbone self.use_pretrained_backbone = use_pretrained_backbone - self.backbone_kwargs = backbone_kwargs + self.backbone_kwargs = backbone_kwargs if backbone_kwargs is not None else {} self.dilation = dilation # Hungarian matcher self.class_cost = class_cost diff --git a/src/transformers/models/dpt/configuration_dpt.py b/src/transformers/models/dpt/configuration_dpt.py index 4d6ac65767c8ce..0292cbc24d9467 100644 --- a/src/transformers/models/dpt/configuration_dpt.py +++ b/src/transformers/models/dpt/configuration_dpt.py @@ -118,6 +118,8 @@ class DPTConfig(PretrainedConfig): use_timm_backbone (`bool`, *optional*, `False`): Whether to load `backbone` from the timm library. If `False`, the backbone is loaded from the transformers library. + backbone_kwargs (`dict`, *optional*): + Keyword arguments to be passed to the backbone constructor e.g. `{'out_indices': (0, 1, 2, 3)}`. Example: @@ -171,6 +173,7 @@ def __init__( backbone=None, use_pretrained_backbone=False, use_timm_backbone=False, + backbone_kwargs=None, **kwargs, ): super().__init__(**kwargs) @@ -231,6 +234,7 @@ def __init__( self.backbone = backbone self.use_pretrained_backbone = use_pretrained_backbone self.use_timm_backbone = use_timm_backbone + self.backbone_kwargs = backbone_kwargs if backbone_kwargs is not None else {} self.num_hidden_layers = None if use_autobackbone else num_hidden_layers self.num_attention_heads = None if use_autobackbone else num_attention_heads self.intermediate_size = None if use_autobackbone else intermediate_size diff --git a/src/transformers/models/mask2former/configuration_mask2former.py b/src/transformers/models/mask2former/configuration_mask2former.py index a6255df0e1a868..6de55ade8e55e2 100644 --- a/src/transformers/models/mask2former/configuration_mask2former.py +++ b/src/transformers/models/mask2former/configuration_mask2former.py @@ -50,6 +50,8 @@ class Mask2FormerConfig(PretrainedConfig): use_timm_backbone (`bool`, *optional*, `False`): Whether to load `backbone` from the timm library. If `False`, the backbone is loaded from the transformers library. + backbone_kwargs (`dict`, *optional*): + Keyword arguments to be passed to the backbone constructor e.g. `{'out_indices': (0, 1, 2, 3)}`. feature_size (`int`, *optional*, defaults to 256): The features (channels) of the resulting feature maps. mask_feature_size (`int`, *optional*, defaults to 256): @@ -157,9 +159,10 @@ def __init__( use_auxiliary_loss: bool = True, feature_strides: List[int] = [4, 8, 16, 32], output_auxiliary_logits: bool = None, - backbone=None, - use_pretrained_backbone=False, - use_timm_backbone=False, + backbone: Optional[str] = None, + use_pretrained_backbone: bool = False, + use_timm_backbone: bool = False, + backbone_kwargs: Optional[Dict] = None, **kwargs, ): if use_pretrained_backbone: @@ -230,6 +233,7 @@ def __init__( self.backbone = backbone self.use_pretrained_backbone = use_pretrained_backbone self.use_timm_backbone = use_timm_backbone + self.backbone_kwargs = backbone_kwargs if backbone_kwargs is not None else {} super().__init__(**kwargs) diff --git a/src/transformers/models/maskformer/configuration_maskformer.py b/src/transformers/models/maskformer/configuration_maskformer.py index 85034876f744d9..535b0fa9eef013 100644 --- a/src/transformers/models/maskformer/configuration_maskformer.py +++ b/src/transformers/models/maskformer/configuration_maskformer.py @@ -60,6 +60,8 @@ class MaskFormerConfig(PretrainedConfig): use_timm_backbone (`bool`, *optional*, `False`): Whether to load `backbone` from the timm library. If `False`, the backbone is loaded from the transformers library. + backbone_kwargs (`dict`, *optional*): + Keyword arguments to be passed to the backbone constructor e.g. `{'out_indices': (0, 1, 2, 3)}`. decoder_config (`Dict`, *optional*): The configuration passed to the transformer decoder model, if unset the base config for `detr-resnet-50` will be used. @@ -120,6 +122,7 @@ def __init__( backbone: Optional[str] = None, use_pretrained_backbone: bool = False, use_timm_backbone: bool = False, + backbone_kwargs: Optional[Dict] = None, **kwargs, ): if use_pretrained_backbone: @@ -192,6 +195,7 @@ def __init__( self.backbone = backbone self.use_pretrained_backbone = use_pretrained_backbone self.use_timm_backbone = use_timm_backbone + self.backbone_kwargs = backbone_kwargs if backbone_kwargs is not None else {} super().__init__(**kwargs) @classmethod diff --git a/src/transformers/models/oneformer/configuration_oneformer.py b/src/transformers/models/oneformer/configuration_oneformer.py index a6e5cd2a9d1c2a..baf617b4e87da8 100644 --- a/src/transformers/models/oneformer/configuration_oneformer.py +++ b/src/transformers/models/oneformer/configuration_oneformer.py @@ -49,6 +49,8 @@ class OneFormerConfig(PretrainedConfig): use_timm_backbone (`bool`, *optional*, `False`): Whether to load `backbone` from the timm library. If `False`, the backbone is loaded from the transformers library. + backbone_kwargs (`dict`, *optional*): + Keyword arguments to be passed to the backbone constructor e.g. `{'out_indices': (0, 1, 2, 3)}`. ignore_value (`int`, *optional*, defaults to 255): Values to be ignored in GT label while calculating loss. num_queries (`int`, *optional*, defaults to 150): @@ -152,6 +154,7 @@ def __init__( backbone: Optional[str] = None, use_pretrained_backbone: bool = False, use_timm_backbone: bool = False, + backbone_kwargs: Optional[Dict] = None, ignore_value: int = 255, num_queries: int = 150, no_object_weight: int = 0.1, @@ -223,6 +226,7 @@ def __init__( self.backbone = backbone self.use_pretrained_backbone = use_pretrained_backbone self.use_timm_backbone = use_timm_backbone + self.backbone_kwargs = backbone_kwargs if backbone_kwargs is not None else {} self.ignore_value = ignore_value self.num_queries = num_queries self.no_object_weight = no_object_weight diff --git a/src/transformers/models/table_transformer/configuration_table_transformer.py b/src/transformers/models/table_transformer/configuration_table_transformer.py index 613863206bd416..3f2f0cadb81581 100644 --- a/src/transformers/models/table_transformer/configuration_table_transformer.py +++ b/src/transformers/models/table_transformer/configuration_table_transformer.py @@ -95,6 +95,8 @@ class TableTransformerConfig(PretrainedConfig): is `False`, this loads the backbone's config and uses that to initialize the backbone with random weights. use_pretrained_backbone (`bool`, *optional*, `True`): Whether to use pretrained weights for the backbone. + backbone_kwargs (`dict`, *optional*): + Keyword arguments to be passed to the backbone constructor e.g. `{'out_indices': (0, 1, 2, 3)}`. dilation (`bool`, *optional*, defaults to `False`): Whether to replace stride with dilation in the last convolutional block (DC5). Only supported when `use_timm_backbone` = `True`. @@ -225,7 +227,7 @@ def __init__( self.position_embedding_type = position_embedding_type self.backbone = backbone self.use_pretrained_backbone = use_pretrained_backbone - self.backbone_kwargs = backbone_kwargs + self.backbone_kwargs = backbone_kwargs if backbone_kwargs is not None else {} self.dilation = dilation # Hungarian matcher self.class_cost = class_cost diff --git a/src/transformers/models/tvp/configuration_tvp.py b/src/transformers/models/tvp/configuration_tvp.py index ef4f2c840bdd23..79a406e75a84b5 100644 --- a/src/transformers/models/tvp/configuration_tvp.py +++ b/src/transformers/models/tvp/configuration_tvp.py @@ -50,6 +50,8 @@ class TvpConfig(PretrainedConfig): use_timm_backbone (`bool`, *optional*, `False`): Whether to load `backbone` from the timm library. If `False`, the backbone is loaded from the transformers library. + backbone_kwargs (`dict`, *optional*): + Keyword arguments to be passed to the backbone constructor e.g. `{'out_indices': (0, 1, 2, 3)}`. distance_loss_weight (`float`, *optional*, defaults to 1.0): The weight of distance loss. duration_loss_weight (`float`, *optional*, defaults to 0.1): @@ -104,6 +106,8 @@ def __init__( backbone_config=None, backbone=None, use_pretrained_backbone=False, + use_timm_backbone=False, + backbone_kwargs=None, distance_loss_weight=1.0, duration_loss_weight=0.1, visual_prompter_type="framepad", @@ -148,6 +152,7 @@ def __init__( self.backbone = backbone self.use_pretrained_backbone = use_pretrained_backbone self.use_timm_backbone = use_timm_backbone + self.backbone_kwargs = backbone_kwargs if backbone_kwargs is not None else {} self.distance_loss_weight = distance_loss_weight self.duration_loss_weight = duration_loss_weight self.visual_prompter_type = visual_prompter_type diff --git a/src/transformers/models/upernet/configuration_upernet.py b/src/transformers/models/upernet/configuration_upernet.py index 7c5813a5f2b5e4..16131c68a8e1d8 100644 --- a/src/transformers/models/upernet/configuration_upernet.py +++ b/src/transformers/models/upernet/configuration_upernet.py @@ -45,6 +45,8 @@ class UperNetConfig(PretrainedConfig): use_timm_backbone (`bool`, *optional*, `False`): Whether to load `backbone` from the timm library. If `False`, the backbone is loaded from the transformers library. + backbone_kwargs (`dict`, *optional*): + Keyword arguments to be passed to the backbone constructor e.g. `{'out_indices': (0, 1, 2, 3)}`. hidden_size (`int`, *optional*, defaults to 512): The number of hidden units in the convolutional layers. initializer_range (`float`, *optional*, defaults to 0.02): @@ -87,6 +89,7 @@ def __init__( backbone=None, use_pretrained_backbone=False, use_timm_backbone=False, + backbone_kwargs=None, hidden_size=512, initializer_range=0.02, pool_scales=[1, 2, 3, 6], @@ -121,6 +124,7 @@ def __init__( self.backbone = backbone self.use_pretrained_backbone = use_pretrained_backbone self.use_timm_backbone = use_timm_backbone + self.backbone_kwargs = backbone_kwargs if backbone_kwargs is not None else {} self.hidden_size = hidden_size self.initializer_range = initializer_range self.pool_scales = pool_scales diff --git a/src/transformers/models/vit_hybrid/configuration_vit_hybrid.py b/src/transformers/models/vit_hybrid/configuration_vit_hybrid.py index 45d2e072286415..a3240bace4d492 100644 --- a/src/transformers/models/vit_hybrid/configuration_vit_hybrid.py +++ b/src/transformers/models/vit_hybrid/configuration_vit_hybrid.py @@ -49,6 +49,8 @@ class ViTHybridConfig(PretrainedConfig): use_timm_backbone (`bool`, *optional*, `False`): Whether to load `backbone` from the timm library. If `False`, the backbone is loaded from the transformers library. + backbone_kwargs (`dict`, *optional*): + Keyword arguments to be passed to the backbone constructor e.g. `{'out_indices': (0, 1, 2, 3)}`. hidden_size (`int`, *optional*, defaults to 768): Dimensionality of the encoder layers and the pooler layer. num_hidden_layers (`int`, *optional*, defaults to 12): @@ -102,6 +104,7 @@ def __init__( backbone=None, use_pretrained_backbone=False, use_timm_backbone=False, + backbone_kwargs=None, hidden_size=768, num_hidden_layers=12, num_attention_heads=12, @@ -153,6 +156,7 @@ def __init__( self.backbone = backbone self.use_pretrained_backbone = use_pretrained_backbone self.use_timm_backbone = use_timm_backbone + self.backbone_kwargs = backbone_kwargs if backbone_kwargs is not None else {} self.hidden_size = hidden_size self.num_hidden_layers = num_hidden_layers self.num_attention_heads = num_attention_heads diff --git a/src/transformers/models/vitmatte/configuration_vitmatte.py b/src/transformers/models/vitmatte/configuration_vitmatte.py index a87aaeb5340aa5..8788e689b674a6 100644 --- a/src/transformers/models/vitmatte/configuration_vitmatte.py +++ b/src/transformers/models/vitmatte/configuration_vitmatte.py @@ -50,6 +50,8 @@ class VitMatteConfig(PretrainedConfig): use_timm_backbone (`bool`, *optional*, `False`): Whether to load `backbone` from the timm library. If `False`, the backbone is loaded from the transformers library. + backbone_kwargs (`dict`, *optional*): + Keyword arguments to be passed to the backbone constructor e.g. `{'out_indices': (0, 1, 2, 3)}`. hidden_size (`int`, *optional*, defaults to 384): The number of input channels of the decoder. batch_norm_eps (`float`, *optional*, defaults to 1e-05): @@ -84,6 +86,7 @@ def __init__( backbone=None, use_pretrained_backbone=False, use_timm_backbone=False, + backbone_kwargs=None, hidden_size: int = 384, batch_norm_eps: float = 1e-5, initializer_range: float = 0.02, @@ -114,6 +117,7 @@ def __init__( self.backbone = backbone self.use_pretrained_backbone = use_pretrained_backbone self.use_timm_backbone = use_timm_backbone + self.backbone_kwargs = backbone_kwargs if backbone_kwargs is not None else {} self.batch_norm_eps = batch_norm_eps self.hidden_size = hidden_size self.initializer_range = initializer_range diff --git a/src/transformers/utils/backbone_utils.py b/src/transformers/utils/backbone_utils.py index 22c35c3f9b6e06..6e4d68cbf41c82 100644 --- a/src/transformers/utils/backbone_utils.py +++ b/src/transformers/utils/backbone_utils.py @@ -288,7 +288,7 @@ def to_dict(self): return output -def load_backbone(config): +def load_backbone(config, **kwargs): """ Loads the backbone model from a config object. @@ -317,7 +317,7 @@ def load_backbone(config): and backbone_checkpoint is None and backbone_checkpoint is None ): - return AutoBackbone.from_config(config=config) + return AutoBackbone.from_config(config=config, **kwargs) # config from the parent model that has a backbone if use_timm_backbone: @@ -326,16 +326,16 @@ def load_backbone(config): # Because of how timm backbones were originally added to models, we need to pass in use_pretrained_backbone # to determine whether to load the pretrained weights. backbone = AutoBackbone.from_pretrained( - backbone_checkpoint, use_timm_backbone=use_timm_backbone, use_pretrained_backbone=use_pretrained_backbone + backbone_checkpoint, use_timm_backbone=use_timm_backbone, use_pretrained_backbone=use_pretrained_backbone, **kwargs ) elif use_pretrained_backbone: if backbone_checkpoint is None: raise ValueError("config.backbone must be set if use_pretrained_backbone is True") - backbone = AutoBackbone.from_pretrained(backbone_checkpoint) + backbone = AutoBackbone.from_pretrained(backbone_checkpoint, **kwargs) else: if backbone_config is None and backbone_checkpoint is None: raise ValueError("Either config.backbone_config or config.backbone must be set") if backbone_config is None: - backbone_config = AutoConfig.from_pretrained(backbone_checkpoint) + backbone_config = AutoConfig.from_pretrained(backbone_checkpoint, **kwargs) backbone = AutoBackbone.from_config(config=backbone_config) return backbone From 62d79f7634f6069606a286e6880a7edf53d74a1f Mon Sep 17 00:00:00 2001 From: Amy Roberts <22614925+amyeroberts@users.noreply.github.com> Date: Thu, 4 Jan 2024 20:03:15 +0000 Subject: [PATCH 19/32] Pass kwargs to constructors --- src/transformers/models/dpt/configuration_dpt.py | 2 +- .../models/oneformer/configuration_oneformer.py | 2 +- src/transformers/models/tvp/configuration_tvp.py | 2 +- .../models/vit_hybrid/configuration_vit_hybrid.py | 2 +- .../models/vitmatte/configuration_vitmatte.py | 2 +- src/transformers/utils/backbone_utils.py | 15 +++++++++++---- utils/check_config_attributes.py | 3 ++- 7 files changed, 18 insertions(+), 10 deletions(-) diff --git a/src/transformers/models/dpt/configuration_dpt.py b/src/transformers/models/dpt/configuration_dpt.py index 0292cbc24d9467..fbab5a5b79745a 100644 --- a/src/transformers/models/dpt/configuration_dpt.py +++ b/src/transformers/models/dpt/configuration_dpt.py @@ -115,7 +115,7 @@ class DPTConfig(PretrainedConfig): is `False`, this loads the backbone's config and uses that to initialize the backbone with random weights. use_pretrained_backbone (`bool`, *optional*, defaults to `False`): Whether to use pretrained weights for the backbone. - use_timm_backbone (`bool`, *optional*, `False`): + use_timm_backbone (`bool`, *optional*, defaults to `False`): Whether to load `backbone` from the timm library. If `False`, the backbone is loaded from the transformers library. backbone_kwargs (`dict`, *optional*): diff --git a/src/transformers/models/oneformer/configuration_oneformer.py b/src/transformers/models/oneformer/configuration_oneformer.py index baf617b4e87da8..f04a90bf81bebf 100644 --- a/src/transformers/models/oneformer/configuration_oneformer.py +++ b/src/transformers/models/oneformer/configuration_oneformer.py @@ -46,7 +46,7 @@ class OneFormerConfig(PretrainedConfig): is `False`, this loads the backbone's config and uses that to initialize the backbone with random weights. use_pretrained_backbone (`bool`, *optional*, defaults to `False`): Whether to use pretrained weights for the backbone. - use_timm_backbone (`bool`, *optional*, `False`): + use_timm_backbone (`bool`, *optional*, defaults to `False`): Whether to load `backbone` from the timm library. If `False`, the backbone is loaded from the transformers library. backbone_kwargs (`dict`, *optional*): diff --git a/src/transformers/models/tvp/configuration_tvp.py b/src/transformers/models/tvp/configuration_tvp.py index 79a406e75a84b5..872b59f539168b 100644 --- a/src/transformers/models/tvp/configuration_tvp.py +++ b/src/transformers/models/tvp/configuration_tvp.py @@ -47,7 +47,7 @@ class TvpConfig(PretrainedConfig): is `False`, this loads the backbone's config and uses that to initialize the backbone with random weights. use_pretrained_backbone (`bool`, *optional*, defaults to `False`): Whether to use pretrained weights for the backbone. - use_timm_backbone (`bool`, *optional*, `False`): + use_timm_backbone (`bool`, *optional*, defaults to `False`): Whether to load `backbone` from the timm library. If `False`, the backbone is loaded from the transformers library. backbone_kwargs (`dict`, *optional*): diff --git a/src/transformers/models/vit_hybrid/configuration_vit_hybrid.py b/src/transformers/models/vit_hybrid/configuration_vit_hybrid.py index a3240bace4d492..fb8f52338c6a72 100644 --- a/src/transformers/models/vit_hybrid/configuration_vit_hybrid.py +++ b/src/transformers/models/vit_hybrid/configuration_vit_hybrid.py @@ -46,7 +46,7 @@ class ViTHybridConfig(PretrainedConfig): is `False`, this loads the backbone's config and uses that to initialize the backbone with random weights. use_pretrained_backbone (`bool`, *optional*, defaults to `False`): Whether to use pretrained weights for the backbone. - use_timm_backbone (`bool`, *optional*, `False`): + use_timm_backbone (`bool`, *optional*, defaults to `False`): Whether to load `backbone` from the timm library. If `False`, the backbone is loaded from the transformers library. backbone_kwargs (`dict`, *optional*): diff --git a/src/transformers/models/vitmatte/configuration_vitmatte.py b/src/transformers/models/vitmatte/configuration_vitmatte.py index 8788e689b674a6..79aecab59b49d8 100644 --- a/src/transformers/models/vitmatte/configuration_vitmatte.py +++ b/src/transformers/models/vitmatte/configuration_vitmatte.py @@ -47,7 +47,7 @@ class VitMatteConfig(PretrainedConfig): is `False`, this loads the backbone's config and uses that to initialize the backbone with random weights. use_pretrained_backbone (`bool`, *optional*, defaults to `False`): Whether to use pretrained weights for the backbone. - use_timm_backbone (`bool`, *optional*, `False`): + use_timm_backbone (`bool`, *optional*, defaults to `False`): Whether to load `backbone` from the timm library. If `False`, the backbone is loaded from the transformers library. backbone_kwargs (`dict`, *optional*): diff --git a/src/transformers/utils/backbone_utils.py b/src/transformers/utils/backbone_utils.py index 6e4d68cbf41c82..d5823b52b479e7 100644 --- a/src/transformers/utils/backbone_utils.py +++ b/src/transformers/utils/backbone_utils.py @@ -304,6 +304,10 @@ def load_backbone(config, **kwargs): use_timm_backbone = getattr(config, "use_timm_backbone", None) use_pretrained_backbone = getattr(config, "use_pretrained_backbone", None) backbone_checkpoint = getattr(config, "backbone", None) + backbone_kwargs = getattr(config, "backbone_kwargs", None) + + backbone_kwargs = {} if backbone_kwargs is None else backbone_kwargs + backbone_kwargs.update(kwargs) # If there is a backbone_config and a backbone checkpoint, and use_pretrained_backbone=False then the desired # behaviour is ill-defined: do you want to load from the checkpoint's config or the backbone_config? @@ -317,7 +321,7 @@ def load_backbone(config, **kwargs): and backbone_checkpoint is None and backbone_checkpoint is None ): - return AutoBackbone.from_config(config=config, **kwargs) + return AutoBackbone.from_config(config=config, **backbone_kwargs) # config from the parent model that has a backbone if use_timm_backbone: @@ -326,16 +330,19 @@ def load_backbone(config, **kwargs): # Because of how timm backbones were originally added to models, we need to pass in use_pretrained_backbone # to determine whether to load the pretrained weights. backbone = AutoBackbone.from_pretrained( - backbone_checkpoint, use_timm_backbone=use_timm_backbone, use_pretrained_backbone=use_pretrained_backbone, **kwargs + backbone_checkpoint, + use_timm_backbone=use_timm_backbone, + use_pretrained_backbone=use_pretrained_backbone, + **backbone_kwargs, ) elif use_pretrained_backbone: if backbone_checkpoint is None: raise ValueError("config.backbone must be set if use_pretrained_backbone is True") - backbone = AutoBackbone.from_pretrained(backbone_checkpoint, **kwargs) + backbone = AutoBackbone.from_pretrained(backbone_checkpoint, **backbone_kwargs) else: if backbone_config is None and backbone_checkpoint is None: raise ValueError("Either config.backbone_config or config.backbone must be set") if backbone_config is None: - backbone_config = AutoConfig.from_pretrained(backbone_checkpoint, **kwargs) + backbone_config = AutoConfig.from_pretrained(backbone_checkpoint, **backbone_kwargs) backbone = AutoBackbone.from_config(config=backbone_config) return backbone diff --git a/utils/check_config_attributes.py b/utils/check_config_attributes.py index a7fb1f0380d9bd..f631c59b75d40e 100644 --- a/utils/check_config_attributes.py +++ b/utils/check_config_attributes.py @@ -238,7 +238,8 @@ def check_attribute_being_used(config_class, attributes, default_value, source_s "use_pretrained_backbone", "backbone", "backbone_config", - "use_timm_backbone" + "use_timm_backbone", + "backbone_kwargs", ] attributes_used_in_generation = ["encoder_no_repeat_ngram_size"] From 19fd92d51e6f529b3f93faf016d43749224e675b Mon Sep 17 00:00:00 2001 From: Amy Roberts <22614925+amyeroberts@users.noreply.github.com> Date: Thu, 4 Jan 2024 20:26:43 +0000 Subject: [PATCH 20/32] Draft --- .../configuration_conditional_detr.py | 37 ++++++++--- .../modeling_conditional_detr.py | 27 +------- .../configuration_deformable_detr.py | 15 ++++- .../modeling_deformable_detr.py | 26 +------- .../models/deta/configuration_deta.py | 5 +- .../models/detr/configuration_detr.py | 37 +++++++++-- src/transformers/models/detr/modeling_detr.py | 27 +------- .../models/dpt/configuration_dpt.py | 14 +++-- .../mask2former/configuration_mask2former.py | 5 +- .../maskformer/configuration_maskformer.py | 8 ++- .../oneformer/configuration_oneformer.py | 8 ++- .../configuration_table_transformer.py | 37 +++++++++-- .../modeling_table_transformer.py | 27 +------- .../timm_backbone/modeling_timm_backbone.py | 7 ++- .../models/tvp/configuration_tvp.py | 5 +- .../models/upernet/configuration_upernet.py | 5 +- .../vit_hybrid/configuration_vit_hybrid.py | 5 +- .../models/vitmatte/configuration_vitmatte.py | 5 +- src/transformers/utils/backbone_utils.py | 6 +- .../test_modeling_deformable_detr.py | 1 - tests/models/detr/test_modeling_detr.py | 3 + tests/utils/test_backbone_utils.py | 61 ++++++++++++++++++- utils/check_config_attributes.py | 4 ++ 23 files changed, 230 insertions(+), 145 deletions(-) diff --git a/src/transformers/models/conditional_detr/configuration_conditional_detr.py b/src/transformers/models/conditional_detr/configuration_conditional_detr.py index 1630f75bc2406e..ef57bd77ed1759 100644 --- a/src/transformers/models/conditional_detr/configuration_conditional_detr.py +++ b/src/transformers/models/conditional_detr/configuration_conditional_detr.py @@ -96,7 +96,8 @@ class ConditionalDetrConfig(PretrainedConfig): use_pretrained_backbone (`bool`, *optional*, defaults to `True`): Whether to use pretrained weights for the backbone. backbone_kwargs (`dict`, *optional*): - Keyword arguments to be passed to the backbone constructor e.g. `{'out_indices': (0, 1, 2, 3)}`. + Keyword arguments to be passed to AutoBackbone when loading from a checkpoint + e.g. `{'out_indices': (0, 1, 2, 3)}`. Cannot be specified if `backbone_config` is set. dilation (`bool`, *optional*, defaults to `False`): Whether to replace stride with dilation in the last convolutional block (DC5). Only supported when `use_timm_backbone` = `True`. @@ -191,10 +192,14 @@ def __init__( if backbone_config is not None and use_timm_backbone: raise ValueError("You can't specify both `backbone_config` and `use_timm_backbone`.") - if backbone_kwargs is not None and backbone_kwargs and backbone_config is not None: - raise ValueError("You can't specify both `backbone_kwargs` and `backbone_config`.") - - if not use_timm_backbone: + if use_timm_backbone and backbone_kwargs is None: + backbone_kwargs = {} + if dilation: + backbone_kwargs["output_stride"] = 16 + backbone_kwargs["out_indices"] = [1, 2, 3, 4] + backbone_kwargs["in_chans"] = num_channels + # Backwards compatibility + elif not use_timm_backbone and backbone in (None, "resnet50"): if backbone_config is None: logger.info("`backbone_config` is `None`. Initializing the config with the default `ResNet` backbone.") backbone_config = CONFIG_MAPPING["resnet"](out_features=["stage4"]) @@ -205,7 +210,7 @@ def __init__( self.use_timm_backbone = use_timm_backbone self.backbone_config = backbone_config - self.num_channels = num_channels + self._num_channels = num_channels self.num_queries = num_queries self.d_model = d_model self.encoder_ffn_dim = encoder_ffn_dim @@ -227,8 +232,8 @@ def __init__( self.position_embedding_type = position_embedding_type self.backbone = backbone self.use_pretrained_backbone = use_pretrained_backbone - self.backbone_kwargs = backbone_kwargs if backbone_kwargs is not None else {} - self.dilation = dilation + self.backbone_kwargs = backbone_kwargs + self._dilation = dilation # Hungarian matcher self.class_cost = class_cost self.bbox_cost = bbox_cost @@ -242,6 +247,16 @@ def __init__( self.focal_alpha = focal_alpha super().__init__(is_encoder_decoder=is_encoder_decoder, **kwargs) + @property + def num_channels(self): + logger.warn("The `num_channels` attribute is deprecated and will be removed in v4.40") + return self._num_channels + + @property + def dilation(self): + logger.warn("The `dilation` attribute is deprecated and will be removed in v4.40") + return self._dilation + @property def num_attention_heads(self) -> int: return self.encoder_attention_heads @@ -250,6 +265,12 @@ def num_attention_heads(self) -> int: def hidden_size(self) -> int: return self.d_model + def to_dict(self): + output = super().to_dict() + output.pop("_num_channels", None) + output.pop("_dilation", None) + return output + class ConditionalDetrOnnxConfig(OnnxConfig): torch_onnx_minimum_version = version.parse("1.11") diff --git a/src/transformers/models/conditional_detr/modeling_conditional_detr.py b/src/transformers/models/conditional_detr/modeling_conditional_detr.py index d8ff371fad77d1..b926b6df482740 100644 --- a/src/transformers/models/conditional_detr/modeling_conditional_detr.py +++ b/src/transformers/models/conditional_detr/modeling_conditional_detr.py @@ -32,7 +32,6 @@ add_start_docstrings_to_model_forward, is_accelerate_available, is_scipy_available, - is_timm_available, is_vision_available, logging, replace_return_docstrings, @@ -49,9 +48,6 @@ if is_scipy_available(): from scipy.optimize import linear_sum_assignment -if is_timm_available(): - from timm import create_model - if is_vision_available(): from ...image_transforms import center_to_corners_format @@ -351,30 +347,13 @@ def __init__(self, config): super().__init__() self.config = config - - if config.use_timm_backbone: - requires_backends(self, ["timm"]) - kwargs = {} - if config.dilation: - kwargs["output_stride"] = 16 - backbone = create_model( - config.backbone, - pretrained=config.use_pretrained_backbone, - features_only=True, - out_indices=(1, 2, 3, 4), - in_chans=config.num_channels, - **kwargs, - ) - else: - backbone = load_backbone(config) + backbone = load_backbone(config) # replace batch norm by frozen batch norm with torch.no_grad(): replace_batch_norm(backbone) self.model = backbone - self.intermediate_channel_sizes = ( - self.model.feature_info.channels() if config.use_timm_backbone else self.model.channels - ) + self.intermediate_channel_sizes = self.model.channels backbone_model_type = config.backbone if config.use_timm_backbone else config.backbone_config.model_type if "resnet" in backbone_model_type: @@ -388,7 +367,7 @@ def __init__(self, config): def forward(self, pixel_values: torch.Tensor, pixel_mask: torch.Tensor): # send pixel_values through the model to get list of feature maps - features = self.model(pixel_values) if self.config.use_timm_backbone else self.model(pixel_values).feature_maps + features = self.model(pixel_values).feature_maps out = [] for feature_map in features: diff --git a/src/transformers/models/deformable_detr/configuration_deformable_detr.py b/src/transformers/models/deformable_detr/configuration_deformable_detr.py index 745cdcca6e5da8..41642ec8783c94 100644 --- a/src/transformers/models/deformable_detr/configuration_deformable_detr.py +++ b/src/transformers/models/deformable_detr/configuration_deformable_detr.py @@ -89,7 +89,8 @@ class DeformableDetrConfig(PretrainedConfig): use_pretrained_backbone (`bool`, *optional*, defaults to `True`): Whether to use pretrained weights for the backbone. backbone_kwargs (`dict`, *optional*): - Keyword arguments to be passed to the backbone constructor e.g. `{'out_indices': (0, 1, 2, 3)}`. + Keyword arguments to be passed to AutoBackbone when loading from a checkpoint + e.g. `{'out_indices': (0, 1, 2, 3)}`. Cannot be specified if `backbone_config` is set. dilation (`bool`, *optional*, defaults to `False`): Whether to replace stride with dilation in the last convolutional block (DC5). Only supported when `use_timm_backbone` = `True`. @@ -211,7 +212,14 @@ def __init__( if backbone_kwargs is not None and backbone_kwargs and backbone_config is not None: raise ValueError("You can't specify both `backbone_kwargs` and `backbone_config`.") - if not use_timm_backbone: + if use_timm_backbone and backbone_kwargs is None: + backbone_kwargs = {} + if dilation: + backbone_kwargs["output_stride"] = 16 + backbone_kwargs["out_indices"] = [2, 3, 4] if num_feature_levels > 1 else [4] + backbone_kwargs["in_chans"] = num_channels + # Backwards compatibility + elif not use_timm_backbone and backbone in (None, "resnet50"): if backbone_config is None: logger.info("`backbone_config` is `None`. Initializing the config with the default `ResNet` backbone.") backbone_config = CONFIG_MAPPING["resnet"](out_features=["stage4"]) @@ -219,6 +227,7 @@ def __init__( backbone_model_type = backbone_config.get("model_type") config_class = CONFIG_MAPPING[backbone_model_type] backbone_config = config_class.from_dict(backbone_config) + self.use_timm_backbone = use_timm_backbone self.backbone_config = backbone_config self.num_channels = num_channels @@ -242,7 +251,7 @@ def __init__( self.position_embedding_type = position_embedding_type self.backbone = backbone self.use_pretrained_backbone = use_pretrained_backbone - self.backbone_kwargs = backbone_kwargs if backbone_kwargs is not None else {} + self.backbone_kwargs = backbone_kwargs self.dilation = dilation # deformable attributes self.num_feature_levels = num_feature_levels diff --git a/src/transformers/models/deformable_detr/modeling_deformable_detr.py b/src/transformers/models/deformable_detr/modeling_deformable_detr.py index c0ac7cffc7ab44..e5dfd5906a9972 100755 --- a/src/transformers/models/deformable_detr/modeling_deformable_detr.py +++ b/src/transformers/models/deformable_detr/modeling_deformable_detr.py @@ -35,7 +35,6 @@ add_start_docstrings, add_start_docstrings_to_model_forward, is_scipy_available, - is_timm_available, is_torch_cuda_available, is_vision_available, replace_return_docstrings, @@ -144,8 +143,6 @@ def backward(context, grad_output): if is_scipy_available(): from scipy.optimize import linear_sum_assignment -if is_timm_available(): - from timm import create_model logger = logging.get_logger(__name__) @@ -419,30 +416,13 @@ def __init__(self, config): super().__init__() self.config = config - - if config.use_timm_backbone: - requires_backends(self, ["timm"]) - kwargs = {} - if config.dilation: - kwargs["output_stride"] = 16 - backbone = create_model( - config.backbone, - pretrained=config.use_pretrained_backbone, - features_only=True, - out_indices=(2, 3, 4) if config.num_feature_levels > 1 else (4,), - in_chans=config.num_channels, - **kwargs, - ) - else: - backbone = load_backbone(config) + backbone = load_backbone(config) # replace batch norm by frozen batch norm with torch.no_grad(): replace_batch_norm(backbone) self.model = backbone - self.intermediate_channel_sizes = ( - self.model.feature_info.channels() if config.use_timm_backbone else self.model.channels - ) + self.intermediate_channel_sizes = self.model.channels backbone_model_type = config.backbone if config.use_timm_backbone else config.backbone_config.model_type if "resnet" in backbone_model_type: @@ -457,7 +437,7 @@ def __init__(self, config): # Copied from transformers.models.detr.modeling_detr.DetrConvEncoder.forward with Detr->DeformableDetr def forward(self, pixel_values: torch.Tensor, pixel_mask: torch.Tensor): # send pixel_values through the model to get list of feature maps - features = self.model(pixel_values) if self.config.use_timm_backbone else self.model(pixel_values).feature_maps + features = self.model(pixel_values).feature_maps out = [] for feature_map in features: diff --git a/src/transformers/models/deta/configuration_deta.py b/src/transformers/models/deta/configuration_deta.py index a30e496ee50612..1604bc56e6396d 100644 --- a/src/transformers/models/deta/configuration_deta.py +++ b/src/transformers/models/deta/configuration_deta.py @@ -49,7 +49,8 @@ class DetaConfig(PretrainedConfig): Whether to load `backbone` from the timm library. If `False`, the backbone is loaded from the transformers library. backbone_kwargs (`dict`, *optional*): - Keyword arguments to be passed to the backbone constructor e.g. `{'out_indices': (0, 1, 2, 3)}`. + Keyword arguments to be passed to AutoBackbone when loading from a checkpoint + e.g. `{'out_indices': (0, 1, 2, 3)}`. Cannot be specified if `backbone_config` is set. num_queries (`int`, *optional*, defaults to 900): Number of object queries, i.e. detection slots. This is the maximal number of objects [`DetaModel`] can detect in a single image. In case `two_stage` is set to `True`, we use `two_stage_num_proposals` instead. @@ -217,7 +218,7 @@ def __init__( self.backbone = backbone self.use_pretrained_backbone = use_pretrained_backbone self.use_timm_backbone = use_timm_backbone - self.backbone_kwargs = backbone_kwargs if backbone_kwargs is not None else {} + self.backbone_kwargs = backbone_kwargs self.num_queries = num_queries self.max_position_embeddings = max_position_embeddings self.d_model = d_model diff --git a/src/transformers/models/detr/configuration_detr.py b/src/transformers/models/detr/configuration_detr.py index b72d372e117c48..9b3bd8b3bec4ef 100644 --- a/src/transformers/models/detr/configuration_detr.py +++ b/src/transformers/models/detr/configuration_detr.py @@ -97,7 +97,8 @@ class DetrConfig(PretrainedConfig): use_pretrained_backbone (`bool`, *optional*, `True`): Whether to use pretrained weights for the backbone. backbone_kwargs (`dict`, *optional*): - Keyword arguments to be passed to the backbone constructor e.g. `{'out_indices': (0, 1, 2, 3)}`. + Keyword arguments to be passed to AutoBackbone when loading from a checkpoint + e.g. `{'out_indices': (0, 1, 2, 3)}`. Cannot be specified if `backbone_config` is set. dilation (`bool`, *optional*, defaults to `False`): Whether to replace stride with dilation in the last convolutional block (DC5). Only supported when `use_timm_backbone` = `True`. @@ -192,7 +193,14 @@ def __init__( if backbone_kwargs is not None and backbone_kwargs and backbone_config is not None: raise ValueError("You can't specify both `backbone_kwargs` and `backbone_config`.") - if not use_timm_backbone: + if use_timm_backbone and backbone_kwargs is None: + backbone_kwargs = {} + if dilation: + backbone_kwargs["output_stride"] = 16 + backbone_kwargs["out_indices"] = [1, 2, 3, 4] + backbone_kwargs["in_chans"] = num_channels + # Backwards compatibility + elif not use_timm_backbone and backbone in (None, "resnet50"): if backbone_config is None: logger.info("`backbone_config` is `None`. Initializing the config with the default `ResNet` backbone.") backbone_config = CONFIG_MAPPING["resnet"](out_features=["stage4"]) @@ -200,12 +208,13 @@ def __init__( backbone_model_type = backbone_config.get("model_type") config_class = CONFIG_MAPPING[backbone_model_type] backbone_config = config_class.from_dict(backbone_config) + backbone = None # set timm attributes to None - dilation, backbone, use_pretrained_backbone = None, None, None + dilation = None self.use_timm_backbone = use_timm_backbone self.backbone_config = backbone_config - self.num_channels = num_channels + self._num_channels = num_channels self.num_queries = num_queries self.d_model = d_model self.encoder_ffn_dim = encoder_ffn_dim @@ -227,8 +236,8 @@ def __init__( self.position_embedding_type = position_embedding_type self.backbone = backbone self.use_pretrained_backbone = use_pretrained_backbone - self.backbone_kwargs = backbone_kwargs if backbone_kwargs is not None else {} - self.dilation = dilation + self.backbone_kwargs = backbone_kwargs + self._dilation = dilation # Hungarian matcher self.class_cost = class_cost self.bbox_cost = bbox_cost @@ -241,6 +250,16 @@ def __init__( self.eos_coefficient = eos_coefficient super().__init__(is_encoder_decoder=is_encoder_decoder, **kwargs) + @property + def num_channels(self): + logger.warn("The `num_channels` attribute is deprecated and will be removed in v4.40") + return self._num_channels + + @property + def dilation(self): + logger.warn("The `dilation` attribute is deprecated and will be removed in v4.40") + return self._dilation + @property def num_attention_heads(self) -> int: return self.encoder_attention_heads @@ -261,6 +280,12 @@ def from_backbone_config(cls, backbone_config: PretrainedConfig, **kwargs): """ return cls(backbone_config=backbone_config, **kwargs) + def to_dict(self): + output = super().to_dict() + output.pop("_num_channels", None) + output.pop("_dilation", None) + return output + class DetrOnnxConfig(OnnxConfig): torch_onnx_minimum_version = version.parse("1.11") diff --git a/src/transformers/models/detr/modeling_detr.py b/src/transformers/models/detr/modeling_detr.py index d7fcdfc5bc7e83..b23bda7e8fa093 100644 --- a/src/transformers/models/detr/modeling_detr.py +++ b/src/transformers/models/detr/modeling_detr.py @@ -32,7 +32,6 @@ add_start_docstrings_to_model_forward, is_accelerate_available, is_scipy_available, - is_timm_available, is_vision_available, logging, replace_return_docstrings, @@ -49,9 +48,6 @@ if is_scipy_available(): from scipy.optimize import linear_sum_assignment -if is_timm_available(): - from timm import create_model - if is_vision_available(): from transformers.image_transforms import center_to_corners_format @@ -344,30 +340,13 @@ def __init__(self, config): super().__init__() self.config = config - - if config.use_timm_backbone: - requires_backends(self, ["timm"]) - kwargs = {} - if config.dilation: - kwargs["output_stride"] = 16 - backbone = create_model( - config.backbone, - pretrained=config.use_pretrained_backbone, - features_only=True, - out_indices=(1, 2, 3, 4), - in_chans=config.num_channels, - **kwargs, - ) - else: - backbone = load_backbone(config) + backbone = load_backbone(config) # replace batch norm by frozen batch norm with torch.no_grad(): replace_batch_norm(backbone) self.model = backbone - self.intermediate_channel_sizes = ( - self.model.feature_info.channels() if config.use_timm_backbone else self.model.channels - ) + self.intermediate_channel_sizes = self.model.channels backbone_model_type = config.backbone if config.use_timm_backbone else config.backbone_config.model_type if "resnet" in backbone_model_type: @@ -381,7 +360,7 @@ def __init__(self, config): def forward(self, pixel_values: torch.Tensor, pixel_mask: torch.Tensor): # send pixel_values through the model to get list of feature maps - features = self.model(pixel_values) if self.config.use_timm_backbone else self.model(pixel_values).feature_maps + features = self.model(pixel_values).feature_maps out = [] for feature_map in features: diff --git a/src/transformers/models/dpt/configuration_dpt.py b/src/transformers/models/dpt/configuration_dpt.py index fbab5a5b79745a..9bdc8d1ef0affb 100644 --- a/src/transformers/models/dpt/configuration_dpt.py +++ b/src/transformers/models/dpt/configuration_dpt.py @@ -119,7 +119,8 @@ class DPTConfig(PretrainedConfig): Whether to load `backbone` from the timm library. If `False`, the backbone is loaded from the transformers library. backbone_kwargs (`dict`, *optional*): - Keyword arguments to be passed to the backbone constructor e.g. `{'out_indices': (0, 1, 2, 3)}`. + Keyword arguments to be passed to AutoBackbone when loading from a checkpoint + e.g. `{'out_indices': (0, 1, 2, 3)}`. Cannot be specified if `backbone_config` is set. Example: @@ -184,9 +185,6 @@ def __init__( if use_pretrained_backbone: raise ValueError("Pretrained backbones are not supported yet.") - if backbone_config is not None and backbone is not None: - raise ValueError("You can't specify both `backbone` and `backbone_config`.") - use_autobackbone = False if self.is_hybrid: if backbone_config is None and backbone is None: @@ -231,10 +229,16 @@ def __init__( self.backbone_featmap_shape = None self.neck_ignore_stages = [] + if use_autobackbone and backbone_config is not None and backbone is not None: + raise ValueError("You can't specify both `backbone` and `backbone_config`.") + + if backbone_kwargs is not None and backbone_kwargs and backbone_config is not None: + raise ValueError("You can't specify both `backbone_kwargs` and `backbone_config`.") + self.backbone = backbone self.use_pretrained_backbone = use_pretrained_backbone self.use_timm_backbone = use_timm_backbone - self.backbone_kwargs = backbone_kwargs if backbone_kwargs is not None else {} + self.backbone_kwargs = backbone_kwargs self.num_hidden_layers = None if use_autobackbone else num_hidden_layers self.num_attention_heads = None if use_autobackbone else num_attention_heads self.intermediate_size = None if use_autobackbone else intermediate_size diff --git a/src/transformers/models/mask2former/configuration_mask2former.py b/src/transformers/models/mask2former/configuration_mask2former.py index 6de55ade8e55e2..f0d13b8e030ed1 100644 --- a/src/transformers/models/mask2former/configuration_mask2former.py +++ b/src/transformers/models/mask2former/configuration_mask2former.py @@ -51,7 +51,8 @@ class Mask2FormerConfig(PretrainedConfig): Whether to load `backbone` from the timm library. If `False`, the backbone is loaded from the transformers library. backbone_kwargs (`dict`, *optional*): - Keyword arguments to be passed to the backbone constructor e.g. `{'out_indices': (0, 1, 2, 3)}`. + Keyword arguments to be passed to AutoBackbone when loading from a checkpoint + e.g. `{'out_indices': (0, 1, 2, 3)}`. Cannot be specified if `backbone_config` is set. feature_size (`int`, *optional*, defaults to 256): The features (channels) of the resulting feature maps. mask_feature_size (`int`, *optional*, defaults to 256): @@ -233,7 +234,7 @@ def __init__( self.backbone = backbone self.use_pretrained_backbone = use_pretrained_backbone self.use_timm_backbone = use_timm_backbone - self.backbone_kwargs = backbone_kwargs if backbone_kwargs is not None else {} + self.backbone_kwargs = backbone_kwargs super().__init__(**kwargs) diff --git a/src/transformers/models/maskformer/configuration_maskformer.py b/src/transformers/models/maskformer/configuration_maskformer.py index 535b0fa9eef013..653350ca056dda 100644 --- a/src/transformers/models/maskformer/configuration_maskformer.py +++ b/src/transformers/models/maskformer/configuration_maskformer.py @@ -61,7 +61,8 @@ class MaskFormerConfig(PretrainedConfig): Whether to load `backbone` from the timm library. If `False`, the backbone is loaded from the transformers library. backbone_kwargs (`dict`, *optional*): - Keyword arguments to be passed to the backbone constructor e.g. `{'out_indices': (0, 1, 2, 3)}`. + Keyword arguments to be passed to AutoBackbone when loading from a checkpoint + e.g. `{'out_indices': (0, 1, 2, 3)}`. Cannot be specified if `backbone_config` is set. decoder_config (`Dict`, *optional*): The configuration passed to the transformer decoder model, if unset the base config for `detr-resnet-50` will be used. @@ -131,6 +132,9 @@ def __init__( if backbone_config is not None and backbone is not None: raise ValueError("You can't specify both `backbone` and `backbone_config`.") + if backbone_kwargs is not None and backbone_kwargs and backbone_config is not None: + raise ValueError("You can't specify both `backbone_kwargs` and `backbone_config`.") + if backbone_config is None and backbone is None: # fall back to https://huggingface.co/microsoft/swin-base-patch4-window12-384-in22k backbone_config = SwinConfig( @@ -195,7 +199,7 @@ def __init__( self.backbone = backbone self.use_pretrained_backbone = use_pretrained_backbone self.use_timm_backbone = use_timm_backbone - self.backbone_kwargs = backbone_kwargs if backbone_kwargs is not None else {} + self.backbone_kwargs = backbone_kwargs super().__init__(**kwargs) @classmethod diff --git a/src/transformers/models/oneformer/configuration_oneformer.py b/src/transformers/models/oneformer/configuration_oneformer.py index f04a90bf81bebf..1cbd2ab7dbc18f 100644 --- a/src/transformers/models/oneformer/configuration_oneformer.py +++ b/src/transformers/models/oneformer/configuration_oneformer.py @@ -50,7 +50,8 @@ class OneFormerConfig(PretrainedConfig): Whether to load `backbone` from the timm library. If `False`, the backbone is loaded from the transformers library. backbone_kwargs (`dict`, *optional*): - Keyword arguments to be passed to the backbone constructor e.g. `{'out_indices': (0, 1, 2, 3)}`. + Keyword arguments to be passed to AutoBackbone when loading from a checkpoint + e.g. `{'out_indices': (0, 1, 2, 3)}`. Cannot be specified if `backbone_config` is set. ignore_value (`int`, *optional*, defaults to 255): Values to be ignored in GT label while calculating loss. num_queries (`int`, *optional*, defaults to 150): @@ -222,11 +223,14 @@ def __init__( config_class = CONFIG_MAPPING[backbone_model_type] backbone_config = config_class.from_dict(backbone_config) + if backbone_kwargs is not None and backbone_kwargs and backbone_config is not None: + raise ValueError("You can't specify both `backbone_kwargs` and `backbone_config`.") + self.backbone_config = backbone_config self.backbone = backbone self.use_pretrained_backbone = use_pretrained_backbone self.use_timm_backbone = use_timm_backbone - self.backbone_kwargs = backbone_kwargs if backbone_kwargs is not None else {} + self.backbone_kwargs = backbone_kwargs self.ignore_value = ignore_value self.num_queries = num_queries self.no_object_weight = no_object_weight diff --git a/src/transformers/models/table_transformer/configuration_table_transformer.py b/src/transformers/models/table_transformer/configuration_table_transformer.py index 3f2f0cadb81581..c6777290ac02d0 100644 --- a/src/transformers/models/table_transformer/configuration_table_transformer.py +++ b/src/transformers/models/table_transformer/configuration_table_transformer.py @@ -96,7 +96,8 @@ class TableTransformerConfig(PretrainedConfig): use_pretrained_backbone (`bool`, *optional*, `True`): Whether to use pretrained weights for the backbone. backbone_kwargs (`dict`, *optional*): - Keyword arguments to be passed to the backbone constructor e.g. `{'out_indices': (0, 1, 2, 3)}`. + Keyword arguments to be passed to AutoBackbone when loading from a checkpoint + e.g. `{'out_indices': (0, 1, 2, 3)}`. Cannot be specified if `backbone_config` is set. dilation (`bool`, *optional*, defaults to `False`): Whether to replace stride with dilation in the last convolutional block (DC5). Only supported when `use_timm_backbone` = `True`. @@ -192,7 +193,14 @@ def __init__( if backbone_kwargs is not None and backbone_kwargs and backbone_config is not None: raise ValueError("You can't specify both `backbone_kwargs` and `backbone_config`.") - if not use_timm_backbone: + if use_timm_backbone and backbone_kwargs is None: + backbone_kwargs = {} + if dilation: + backbone_kwargs["output_stride"] = 16 + backbone_kwargs["out_indices"] = [1, 2, 3, 4] + backbone_kwargs["in_chans"] = num_channels + # Backwards compatibility + elif not use_timm_backbone and backbone in (None, "resnet50"): if backbone_config is None: logger.info("`backbone_config` is `None`. Initializing the config with the default `ResNet` backbone.") backbone_config = CONFIG_MAPPING["resnet"](out_features=["stage4"]) @@ -200,12 +208,13 @@ def __init__( backbone_model_type = backbone_config.get("model_type") config_class = CONFIG_MAPPING[backbone_model_type] backbone_config = config_class.from_dict(backbone_config) + backbone = None # set timm attributes to None - dilation, backbone, use_pretrained_backbone = None, None, None + dilation = None self.use_timm_backbone = use_timm_backbone self.backbone_config = backbone_config - self.num_channels = num_channels + self._num_channels = num_channels self.num_queries = num_queries self.d_model = d_model self.encoder_ffn_dim = encoder_ffn_dim @@ -227,8 +236,8 @@ def __init__( self.position_embedding_type = position_embedding_type self.backbone = backbone self.use_pretrained_backbone = use_pretrained_backbone - self.backbone_kwargs = backbone_kwargs if backbone_kwargs is not None else {} - self.dilation = dilation + self.backbone_kwargs = backbone_kwargs + self._dilation = dilation # Hungarian matcher self.class_cost = class_cost self.bbox_cost = bbox_cost @@ -241,6 +250,16 @@ def __init__( self.eos_coefficient = eos_coefficient super().__init__(is_encoder_decoder=is_encoder_decoder, **kwargs) + @property + def num_channels(self): + logger.warn("The `num_channels` attribute is deprecated and will be removed in v4.40") + return self._num_channels + + @property + def dilation(self): + logger.warn("The `dilation` attribute is deprecated and will be removed in v4.40") + return self._dilation + @property def num_attention_heads(self) -> int: return self.encoder_attention_heads @@ -249,6 +268,12 @@ def num_attention_heads(self) -> int: def hidden_size(self) -> int: return self.d_model + def to_dict(self): + output = super().to_dict() + output.pop("_num_channels", None) + output.pop("_dilation", None) + return output + # Copied from transformers.models.detr.configuration_detr.DetrOnnxConfig class TableTransformerOnnxConfig(OnnxConfig): diff --git a/src/transformers/models/table_transformer/modeling_table_transformer.py b/src/transformers/models/table_transformer/modeling_table_transformer.py index 8e577a65a5fe00..bf622be57a6b92 100644 --- a/src/transformers/models/table_transformer/modeling_table_transformer.py +++ b/src/transformers/models/table_transformer/modeling_table_transformer.py @@ -32,7 +32,6 @@ add_start_docstrings_to_model_forward, is_accelerate_available, is_scipy_available, - is_timm_available, is_vision_available, logging, replace_return_docstrings, @@ -45,9 +44,6 @@ if is_scipy_available(): from scipy.optimize import linear_sum_assignment -if is_timm_available(): - from timm import create_model - if is_vision_available(): from transformers.image_transforms import center_to_corners_format @@ -278,30 +274,13 @@ def __init__(self, config): super().__init__() self.config = config - - if config.use_timm_backbone: - requires_backends(self, ["timm"]) - kwargs = {} - if config.dilation: - kwargs["output_stride"] = 16 - backbone = create_model( - config.backbone, - pretrained=config.use_pretrained_backbone, - features_only=True, - out_indices=(1, 2, 3, 4), - in_chans=config.num_channels, - **kwargs, - ) - else: - backbone = load_backbone(config) + backbone = load_backbone(config) # replace batch norm by frozen batch norm with torch.no_grad(): replace_batch_norm(backbone) self.model = backbone - self.intermediate_channel_sizes = ( - self.model.feature_info.channels() if config.use_timm_backbone else self.model.channels - ) + self.intermediate_channel_sizes = self.model.channels backbone_model_type = config.backbone if config.use_timm_backbone else config.backbone_config.model_type if "resnet" in backbone_model_type: @@ -315,7 +294,7 @@ def __init__(self, config): def forward(self, pixel_values: torch.Tensor, pixel_mask: torch.Tensor): # send pixel_values through the model to get list of feature maps - features = self.model(pixel_values) if self.config.use_timm_backbone else self.model(pixel_values).feature_maps + features = self.model(pixel_values).feature_maps out = [] for feature_map in features: diff --git a/src/transformers/models/timm_backbone/modeling_timm_backbone.py b/src/transformers/models/timm_backbone/modeling_timm_backbone.py index 0c6fe67b75731f..e8e0b28e042d6f 100644 --- a/src/transformers/models/timm_backbone/modeling_timm_backbone.py +++ b/src/transformers/models/timm_backbone/modeling_timm_backbone.py @@ -63,12 +63,13 @@ def __init__(self, config, **kwargs): # We just take the final layer by default. This matches the default for the transformers models. out_indices = config.out_indices if getattr(config, "out_indices", None) is not None else (-1,) + in_chans = kwargs.pop("in_chans", config.num_channels) self._backbone = timm.create_model( config.backbone, pretrained=pretrained, # This is currently not possible for transformer architectures. features_only=config.features_only, - in_chans=config.num_channels, + in_chans=in_chans, out_indices=out_indices, **kwargs, ) @@ -79,7 +80,9 @@ def __init__(self, config, **kwargs): # These are used to control the output of the model when called. If output_hidden_states is True, then # return_layers is modified to include all layers. - self._return_layers = self._backbone.return_layers + self._return_layers = { + layer["module"]: str(layer["index"]) for layer in self._backbone.feature_info.get_dicts() + } self._all_layers = {layer["module"]: str(i) for i, layer in enumerate(self._backbone.feature_info.info)} super()._init_backbone(config) diff --git a/src/transformers/models/tvp/configuration_tvp.py b/src/transformers/models/tvp/configuration_tvp.py index 872b59f539168b..85b7ac6a41cbcc 100644 --- a/src/transformers/models/tvp/configuration_tvp.py +++ b/src/transformers/models/tvp/configuration_tvp.py @@ -51,7 +51,8 @@ class TvpConfig(PretrainedConfig): Whether to load `backbone` from the timm library. If `False`, the backbone is loaded from the transformers library. backbone_kwargs (`dict`, *optional*): - Keyword arguments to be passed to the backbone constructor e.g. `{'out_indices': (0, 1, 2, 3)}`. + Keyword arguments to be passed to AutoBackbone when loading from a checkpoint + e.g. `{'out_indices': (0, 1, 2, 3)}`. Cannot be specified if `backbone_config` is set. distance_loss_weight (`float`, *optional*, defaults to 1.0): The weight of distance loss. duration_loss_weight (`float`, *optional*, defaults to 0.1): @@ -152,7 +153,7 @@ def __init__( self.backbone = backbone self.use_pretrained_backbone = use_pretrained_backbone self.use_timm_backbone = use_timm_backbone - self.backbone_kwargs = backbone_kwargs if backbone_kwargs is not None else {} + self.backbone_kwargs = backbone_kwargs self.distance_loss_weight = distance_loss_weight self.duration_loss_weight = duration_loss_weight self.visual_prompter_type = visual_prompter_type diff --git a/src/transformers/models/upernet/configuration_upernet.py b/src/transformers/models/upernet/configuration_upernet.py index 16131c68a8e1d8..609818c80d17b7 100644 --- a/src/transformers/models/upernet/configuration_upernet.py +++ b/src/transformers/models/upernet/configuration_upernet.py @@ -46,7 +46,8 @@ class UperNetConfig(PretrainedConfig): Whether to load `backbone` from the timm library. If `False`, the backbone is loaded from the transformers library. backbone_kwargs (`dict`, *optional*): - Keyword arguments to be passed to the backbone constructor e.g. `{'out_indices': (0, 1, 2, 3)}`. + Keyword arguments to be passed to AutoBackbone when loading from a checkpoint + e.g. `{'out_indices': (0, 1, 2, 3)}`. Cannot be specified if `backbone_config` is set. hidden_size (`int`, *optional*, defaults to 512): The number of hidden units in the convolutional layers. initializer_range (`float`, *optional*, defaults to 0.02): @@ -124,7 +125,7 @@ def __init__( self.backbone = backbone self.use_pretrained_backbone = use_pretrained_backbone self.use_timm_backbone = use_timm_backbone - self.backbone_kwargs = backbone_kwargs if backbone_kwargs is not None else {} + self.backbone_kwargs = backbone_kwargs self.hidden_size = hidden_size self.initializer_range = initializer_range self.pool_scales = pool_scales diff --git a/src/transformers/models/vit_hybrid/configuration_vit_hybrid.py b/src/transformers/models/vit_hybrid/configuration_vit_hybrid.py index fb8f52338c6a72..8a8a808ec60d05 100644 --- a/src/transformers/models/vit_hybrid/configuration_vit_hybrid.py +++ b/src/transformers/models/vit_hybrid/configuration_vit_hybrid.py @@ -50,7 +50,8 @@ class ViTHybridConfig(PretrainedConfig): Whether to load `backbone` from the timm library. If `False`, the backbone is loaded from the transformers library. backbone_kwargs (`dict`, *optional*): - Keyword arguments to be passed to the backbone constructor e.g. `{'out_indices': (0, 1, 2, 3)}`. + Keyword arguments to be passed to AutoBackbone when loading from a checkpoint + e.g. `{'out_indices': (0, 1, 2, 3)}`. Cannot be specified if `backbone_config` is set. hidden_size (`int`, *optional*, defaults to 768): Dimensionality of the encoder layers and the pooler layer. num_hidden_layers (`int`, *optional*, defaults to 12): @@ -156,7 +157,7 @@ def __init__( self.backbone = backbone self.use_pretrained_backbone = use_pretrained_backbone self.use_timm_backbone = use_timm_backbone - self.backbone_kwargs = backbone_kwargs if backbone_kwargs is not None else {} + self.backbone_kwargs = backbone_kwargs self.hidden_size = hidden_size self.num_hidden_layers = num_hidden_layers self.num_attention_heads = num_attention_heads diff --git a/src/transformers/models/vitmatte/configuration_vitmatte.py b/src/transformers/models/vitmatte/configuration_vitmatte.py index 79aecab59b49d8..275640d1d079a1 100644 --- a/src/transformers/models/vitmatte/configuration_vitmatte.py +++ b/src/transformers/models/vitmatte/configuration_vitmatte.py @@ -51,7 +51,8 @@ class VitMatteConfig(PretrainedConfig): Whether to load `backbone` from the timm library. If `False`, the backbone is loaded from the transformers library. backbone_kwargs (`dict`, *optional*): - Keyword arguments to be passed to the backbone constructor e.g. `{'out_indices': (0, 1, 2, 3)}`. + Keyword arguments to be passed to AutoBackbone when loading from a checkpoint + e.g. `{'out_indices': (0, 1, 2, 3)}`. Cannot be specified if `backbone_config` is set. hidden_size (`int`, *optional*, defaults to 384): The number of input channels of the decoder. batch_norm_eps (`float`, *optional*, defaults to 1e-05): @@ -117,7 +118,7 @@ def __init__( self.backbone = backbone self.use_pretrained_backbone = use_pretrained_backbone self.use_timm_backbone = use_timm_backbone - self.backbone_kwargs = backbone_kwargs if backbone_kwargs is not None else {} + self.backbone_kwargs = backbone_kwargs self.batch_norm_eps = batch_norm_eps self.hidden_size = hidden_size self.initializer_range = initializer_range diff --git a/src/transformers/utils/backbone_utils.py b/src/transformers/utils/backbone_utils.py index d5823b52b479e7..14fcfe4a50a2d2 100644 --- a/src/transformers/utils/backbone_utils.py +++ b/src/transformers/utils/backbone_utils.py @@ -288,7 +288,7 @@ def to_dict(self): return output -def load_backbone(config, **kwargs): +def load_backbone(config): """ Loads the backbone model from a config object. @@ -307,7 +307,9 @@ def load_backbone(config, **kwargs): backbone_kwargs = getattr(config, "backbone_kwargs", None) backbone_kwargs = {} if backbone_kwargs is None else backbone_kwargs - backbone_kwargs.update(kwargs) + + if backbone_kwargs and backbone_config is not None: + raise ValueError("You can't specify both `backbone_kwargs` and `backbone_config`.") # If there is a backbone_config and a backbone checkpoint, and use_pretrained_backbone=False then the desired # behaviour is ill-defined: do you want to load from the checkpoint's config or the backbone_config? diff --git a/tests/models/deformable_detr/test_modeling_deformable_detr.py b/tests/models/deformable_detr/test_modeling_deformable_detr.py index 8487b5dc694d54..7a83c4f1ed80a8 100644 --- a/tests/models/deformable_detr/test_modeling_deformable_detr.py +++ b/tests/models/deformable_detr/test_modeling_deformable_detr.py @@ -152,7 +152,6 @@ def get_config(self): use_timm_backbone=False, backbone=None, backbone_config=resnet_config, - backbone=None, use_pretrained_backbone=False, ) diff --git a/tests/models/detr/test_modeling_detr.py b/tests/models/detr/test_modeling_detr.py index 59b071e031aa8a..cce951561f3352 100644 --- a/tests/models/detr/test_modeling_detr.py +++ b/tests/models/detr/test_modeling_detr.py @@ -444,6 +444,9 @@ def test_different_timm_backbone(self): # let's pick a random timm backbone config.backbone = "tf_mobilenetv3_small_075" + config.backbone_config = None + config.use_timm_backbone = True + config.backbone_kwargs = {"out_indices": [2, 3, 4]} for model_class in self.all_model_classes: model = model_class(config) diff --git a/tests/utils/test_backbone_utils.py b/tests/utils/test_backbone_utils.py index 0c3ff4866e8379..244f62950f048f 100644 --- a/tests/utils/test_backbone_utils.py +++ b/tests/utils/test_backbone_utils.py @@ -16,7 +16,7 @@ import pytest -from transformers import DetrConfig, MaskFormerConfig +from transformers import DetrConfig, MaskFormerConfig, ResNetBackbone, ResNetConfig, TimmBackbone from transformers.testing_utils import require_torch, slow from transformers.utils.backbone_utils import ( BackboneMixin, @@ -137,6 +137,65 @@ def test_backbone_mixin(self): self.assertEqual(backbone.out_features, ["a", "c"]) self.assertEqual(backbone.out_indices, [-3, -1]) + @slow + @require_torch + def test_load_backbone_from_config(self): + """ + Test that load_backbone correctly loads a backbone from a backbone config. + """ + config = MaskFormerConfig(backbone_config=ResNetConfig(out_indices=(0, 2))) + backbone = load_backbone(config) + self.assertEqual(backbone.out_features, ["stem", "stage2"]) + self.assertEqual(backbone.out_indices, (0, 2)) + self.assertIsInstance(backbone, ResNetBackbone) + + @slow + @require_torch + def test_load_backbone_from_checkpoint(self): + """ + Test that load_backbone correctly loads a backbone from a checkpoint. + """ + config = MaskFormerConfig(backbone="microsoft/resnet-18", backbone_config=None) + backbone = load_backbone(config) + self.assertEqual(backbone.out_indices, [4]) + self.assertEqual(backbone.out_features, ["stage4"]) + self.assertIsInstance(backbone, ResNetBackbone) + + config = MaskFormerConfig( + backbone="resnet18", + use_timm_backbone=True, + ) + backbone = load_backbone(config) + # We can't know ahead of time the exact output features and indices, or the layer names before + # creating the timm model, so it defalts to the last layer (-1,) and has a different layer name + self.assertEqual(backbone.out_indices, (-1,)) + self.assertEqual(backbone.out_features, ["layer4"]) + self.assertIsInstance(backbone, TimmBackbone) + + @slow + @require_torch + def test_load_backbone_backbone_kwargs(self): + """ + Test that load_backbone correctly configures the loaded backbone with the provided kwargs. + """ + config = MaskFormerConfig(backbone="resnet18", use_timm_backbone=True, backbone_kwargs={"out_indices": (0, 1)}) + backbone = load_backbone(config) + self.assertEqual(backbone.out_indices, (0, 1)) + self.assertIsInstance(backbone, TimmBackbone) + + config = MaskFormerConfig(backbone="microsoft/resnet-18", backbone_kwargs={"out_indices": (0, 2)}) + backbone = load_backbone(config) + self.assertEqual(backbone.out_indices, (0, 2)) + self.assertIsInstance(backbone, ResNetBackbone) + + # Check can't be passed with a backone config + with pytest.raises(ValueError): + config = MaskFormerConfig( + backbone="microsoft/resnet-18", + backbone_config=ResNetConfig(out_indices=(0, 2)), + backbone_kwargs={"out_indices": (0, 1)}, + ) + @slow @require_torch def test_load_backbone_in_new_model(self): diff --git a/utils/check_config_attributes.py b/utils/check_config_attributes.py index f631c59b75d40e..84abb1386bd2cf 100644 --- a/utils/check_config_attributes.py +++ b/utils/check_config_attributes.py @@ -132,9 +132,13 @@ # TODO (ydshieh): Check the failing cases, try to fix them or move some cases to the above block once we are sure SPECIAL_CASES_TO_ALLOW.update( { + # Has no longer used attributes that will be removed after a deprecation cycle in v4.40 + "ConditionalDetrConfig": True, "CLIPSegConfig": True, "DeformableDetrConfig": True, "DetaConfig": True, + # Has no longer used attributes that will be removed after a deprecation cycle in v4.40 + "DetrConfig": True, "DinatConfig": True, "DonutSwinConfig": True, "EfficientFormerConfig": True, From f9cbc017f9edab177acd1a3bdbf974395d3cd32c Mon Sep 17 00:00:00 2001 From: Amy Roberts <22614925+amyeroberts@users.noreply.github.com> Date: Mon, 19 Feb 2024 15:54:44 +0000 Subject: [PATCH 21/32] Fix tests --- src/transformers/models/dpt/modeling_dpt.py | 2 +- .../models/conditional_detr/test_modeling_conditional_detr.py | 2 ++ tests/models/detr/test_modeling_detr.py | 1 - .../table_transformer/test_modeling_table_transformer.py | 4 +++- 4 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/transformers/models/dpt/modeling_dpt.py b/src/transformers/models/dpt/modeling_dpt.py index a3899f86ff8457..ef6c8bb853abda 100755 --- a/src/transformers/models/dpt/modeling_dpt.py +++ b/src/transformers/models/dpt/modeling_dpt.py @@ -1075,7 +1075,7 @@ def __init__(self, config): super().__init__(config) self.backbone = None - if config.is_hybrid: + if config.is_hybrid or config.backbone_config is None: self.dpt = DPTModel(config, add_pooling_layer=False) else: self.backbone = load_backbone(config) diff --git a/tests/models/conditional_detr/test_modeling_conditional_detr.py b/tests/models/conditional_detr/test_modeling_conditional_detr.py index d1152ed8622b9c..bc53f826ed2125 100644 --- a/tests/models/conditional_detr/test_modeling_conditional_detr.py +++ b/tests/models/conditional_detr/test_modeling_conditional_detr.py @@ -444,7 +444,9 @@ def test_different_timm_backbone(self): # let's pick a random timm backbone config.backbone = "tf_mobilenetv3_small_075" + config.backbone_config = None config.use_timm_backbone = True + config.backbone_kwargs = {"out_indices": [2, 3, 4]} for model_class in self.all_model_classes: model = model_class(config) diff --git a/tests/models/detr/test_modeling_detr.py b/tests/models/detr/test_modeling_detr.py index cce951561f3352..e581ce1729668b 100644 --- a/tests/models/detr/test_modeling_detr.py +++ b/tests/models/detr/test_modeling_detr.py @@ -474,7 +474,6 @@ def test_greyscale_images(self): ) # let's set num_channels to 1 - config.num_channels = 1 config.backbone_config.num_channels = 1 for model_class in self.all_model_classes: diff --git a/tests/models/table_transformer/test_modeling_table_transformer.py b/tests/models/table_transformer/test_modeling_table_transformer.py index 79da1d191063ab..8f3fd1d7e8f8b7 100644 --- a/tests/models/table_transformer/test_modeling_table_transformer.py +++ b/tests/models/table_transformer/test_modeling_table_transformer.py @@ -456,6 +456,9 @@ def test_different_timm_backbone(self): # let's pick a random timm backbone config.backbone = "tf_mobilenetv3_small_075" + config.backbone_config = None + config.use_timm_backbone = True + config.backbone_kwargs = {"out_indices": [2, 3, 4]} for model_class in self.all_model_classes: model = model_class(config) @@ -483,7 +486,6 @@ def test_greyscale_images(self): ) # let's set num_channels to 1 - config.num_channels = 1 config.backbone_config.num_channels = 1 for model_class in self.all_model_classes: From cffd51fc79ca6d62dcafd9202f45eeb2882bebd8 Mon Sep 17 00:00:00 2001 From: Amy Roberts <22614925+amyeroberts@users.noreply.github.com> Date: Thu, 7 Mar 2024 20:50:40 +0000 Subject: [PATCH 22/32] Add back timm - weight naming --- .../configuration_conditional_detr.py | 14 ++------ .../modeling_conditional_detr.py | 32 ++++++++++++++++--- .../modeling_deformable_detr.py | 6 +++- .../models/detr/configuration_detr.py | 14 ++------ src/transformers/models/detr/modeling_detr.py | 32 +++++++++++++++++-- .../configuration_table_transformer.py | 14 ++------ .../modeling_table_transformer.py | 30 +++++++++++++++-- tests/utils/test_backbone_utils.py | 2 +- 8 files changed, 96 insertions(+), 48 deletions(-) diff --git a/src/transformers/models/conditional_detr/configuration_conditional_detr.py b/src/transformers/models/conditional_detr/configuration_conditional_detr.py index ef57bd77ed1759..bad7092419506c 100644 --- a/src/transformers/models/conditional_detr/configuration_conditional_detr.py +++ b/src/transformers/models/conditional_detr/configuration_conditional_detr.py @@ -210,7 +210,7 @@ def __init__( self.use_timm_backbone = use_timm_backbone self.backbone_config = backbone_config - self._num_channels = num_channels + self.num_channels = num_channels self.num_queries = num_queries self.d_model = d_model self.encoder_ffn_dim = encoder_ffn_dim @@ -233,7 +233,7 @@ def __init__( self.backbone = backbone self.use_pretrained_backbone = use_pretrained_backbone self.backbone_kwargs = backbone_kwargs - self._dilation = dilation + self.dilation = dilation # Hungarian matcher self.class_cost = class_cost self.bbox_cost = bbox_cost @@ -247,16 +247,6 @@ def __init__( self.focal_alpha = focal_alpha super().__init__(is_encoder_decoder=is_encoder_decoder, **kwargs) - @property - def num_channels(self): - logger.warn("The `num_channels` attribute is deprecated and will be removed in v4.40") - return self._num_channels - - @property - def dilation(self): - logger.warn("The `dilation` attribute is deprecated and will be removed in v4.40") - return self._dilation - @property def num_attention_heads(self) -> int: return self.encoder_attention_heads diff --git a/src/transformers/models/conditional_detr/modeling_conditional_detr.py b/src/transformers/models/conditional_detr/modeling_conditional_detr.py index b926b6df482740..b634462437da72 100644 --- a/src/transformers/models/conditional_detr/modeling_conditional_detr.py +++ b/src/transformers/models/conditional_detr/modeling_conditional_detr.py @@ -32,6 +32,7 @@ add_start_docstrings_to_model_forward, is_accelerate_available, is_scipy_available, + is_timm_available, is_vision_available, logging, replace_return_docstrings, @@ -48,6 +49,9 @@ if is_scipy_available(): from scipy.optimize import linear_sum_assignment +if is_timm_available(): + from timm.models import create_model + if is_vision_available(): from ...image_transforms import center_to_corners_format @@ -335,7 +339,7 @@ def replace_batch_norm(model): # Copied from transformers.models.detr.modeling_detr.DetrConvEncoder -class ConditionalDetrConvEncoder(nn.Module): +class DetrConvEncoder(nn.Module): """ Convolutional backbone, using either the AutoBackbone API or one from the timm library. @@ -347,13 +351,33 @@ def __init__(self, config): super().__init__() self.config = config - backbone = load_backbone(config) + + # For backwards compatibility we have to use the timm library directly instead of the AutoBackbone API + if config.use_timm_backbone: + requires_backends(self, ["timm"]) + kwargs = getattr(config, "backbone_kwargs", {}) + out_indices = kwargs.pop("out_indices", (1, 2, 3, 4)) + num_channels = kwargs.pop("in_chans", config.num_channels) + if config.dilation: + kwargs["output_stride"] = kwargs.get("output_stride", 16) + backbone = create_model( + config.backbone, + pretrained=config.use_pretrained_backbone, + features_only=True, + out_indices=out_indices, + in_chans=num_channels, + **kwargs, + ) + else: + backbone = load_backbone(config) # replace batch norm by frozen batch norm with torch.no_grad(): replace_batch_norm(backbone) self.model = backbone - self.intermediate_channel_sizes = self.model.channels + self.intermediate_channel_sizes = ( + self.model.feature_info.channels() if config.use_timm_backbone else self.model.channels + ) backbone_model_type = config.backbone if config.use_timm_backbone else config.backbone_config.model_type if "resnet" in backbone_model_type: @@ -367,7 +391,7 @@ def __init__(self, config): def forward(self, pixel_values: torch.Tensor, pixel_mask: torch.Tensor): # send pixel_values through the model to get list of feature maps - features = self.model(pixel_values).feature_maps + features = self.model(pixel_values) if self.config.use_timm_backbone else self.model(pixel_values).feature_maps out = [] for feature_map in features: diff --git a/src/transformers/models/deformable_detr/modeling_deformable_detr.py b/src/transformers/models/deformable_detr/modeling_deformable_detr.py index e5dfd5906a9972..e146c1c8312da2 100755 --- a/src/transformers/models/deformable_detr/modeling_deformable_detr.py +++ b/src/transformers/models/deformable_detr/modeling_deformable_detr.py @@ -35,6 +35,7 @@ add_start_docstrings, add_start_docstrings_to_model_forward, is_scipy_available, + is_timm_available, is_torch_cuda_available, is_vision_available, replace_return_docstrings, @@ -91,6 +92,9 @@ def load_cuda_kernels(): from accelerate import PartialState from accelerate.utils import reduce +if is_timm_available(): + pass + class MultiScaleDeformableAttentionFunction(Function): @staticmethod @@ -437,7 +441,7 @@ def __init__(self, config): # Copied from transformers.models.detr.modeling_detr.DetrConvEncoder.forward with Detr->DeformableDetr def forward(self, pixel_values: torch.Tensor, pixel_mask: torch.Tensor): # send pixel_values through the model to get list of feature maps - features = self.model(pixel_values).feature_maps + features = self.model(pixel_values) if self.config.use_timm_backbone else self.model(pixel_values).feature_maps out = [] for feature_map in features: diff --git a/src/transformers/models/detr/configuration_detr.py b/src/transformers/models/detr/configuration_detr.py index 9b3bd8b3bec4ef..2f3ce689e4ae0a 100644 --- a/src/transformers/models/detr/configuration_detr.py +++ b/src/transformers/models/detr/configuration_detr.py @@ -214,7 +214,7 @@ def __init__( self.use_timm_backbone = use_timm_backbone self.backbone_config = backbone_config - self._num_channels = num_channels + self.num_channels = num_channels self.num_queries = num_queries self.d_model = d_model self.encoder_ffn_dim = encoder_ffn_dim @@ -237,7 +237,7 @@ def __init__( self.backbone = backbone self.use_pretrained_backbone = use_pretrained_backbone self.backbone_kwargs = backbone_kwargs - self._dilation = dilation + self.dilation = dilation # Hungarian matcher self.class_cost = class_cost self.bbox_cost = bbox_cost @@ -250,16 +250,6 @@ def __init__( self.eos_coefficient = eos_coefficient super().__init__(is_encoder_decoder=is_encoder_decoder, **kwargs) - @property - def num_channels(self): - logger.warn("The `num_channels` attribute is deprecated and will be removed in v4.40") - return self._num_channels - - @property - def dilation(self): - logger.warn("The `dilation` attribute is deprecated and will be removed in v4.40") - return self._dilation - @property def num_attention_heads(self) -> int: return self.encoder_attention_heads diff --git a/src/transformers/models/detr/modeling_detr.py b/src/transformers/models/detr/modeling_detr.py index b23bda7e8fa093..8b5c47b45bda97 100644 --- a/src/transformers/models/detr/modeling_detr.py +++ b/src/transformers/models/detr/modeling_detr.py @@ -32,6 +32,7 @@ add_start_docstrings_to_model_forward, is_accelerate_available, is_scipy_available, + is_timm_available, is_vision_available, logging, replace_return_docstrings, @@ -48,6 +49,11 @@ if is_scipy_available(): from scipy.optimize import linear_sum_assignment + +if is_timm_available(): + from timm import create_model + + if is_vision_available(): from transformers.image_transforms import center_to_corners_format @@ -340,13 +346,33 @@ def __init__(self, config): super().__init__() self.config = config - backbone = load_backbone(config) + + # For backwards compatibility we have to use the timm library directly instead of the AutoBackbone API + if config.use_timm_backbone: + requires_backends(self, ["timm"]) + kwargs = getattr(config, "backbone_kwargs", {}) + out_indices = kwargs.pop("out_indices", (1, 2, 3, 4)) + num_channels = kwargs.pop("in_chans", config.num_channels) + if config.dilation: + kwargs["output_stride"] = kwargs.get("output_stride", 16) + backbone = create_model( + config.backbone, + pretrained=config.use_pretrained_backbone, + features_only=True, + out_indices=out_indices, + in_chans=num_channels, + **kwargs, + ) + else: + backbone = load_backbone(config) # replace batch norm by frozen batch norm with torch.no_grad(): replace_batch_norm(backbone) self.model = backbone - self.intermediate_channel_sizes = self.model.channels + self.intermediate_channel_sizes = ( + self.model.feature_info.channels() if config.use_timm_backbone else self.model.channels + ) backbone_model_type = config.backbone if config.use_timm_backbone else config.backbone_config.model_type if "resnet" in backbone_model_type: @@ -360,7 +386,7 @@ def __init__(self, config): def forward(self, pixel_values: torch.Tensor, pixel_mask: torch.Tensor): # send pixel_values through the model to get list of feature maps - features = self.model(pixel_values).feature_maps + features = self.model(pixel_values) if self.config.use_timm_backbone else self.model(pixel_values).feature_maps out = [] for feature_map in features: diff --git a/src/transformers/models/table_transformer/configuration_table_transformer.py b/src/transformers/models/table_transformer/configuration_table_transformer.py index c6777290ac02d0..c18bbea69ee4a1 100644 --- a/src/transformers/models/table_transformer/configuration_table_transformer.py +++ b/src/transformers/models/table_transformer/configuration_table_transformer.py @@ -214,7 +214,7 @@ def __init__( self.use_timm_backbone = use_timm_backbone self.backbone_config = backbone_config - self._num_channels = num_channels + self.num_channels = num_channels self.num_queries = num_queries self.d_model = d_model self.encoder_ffn_dim = encoder_ffn_dim @@ -237,7 +237,7 @@ def __init__( self.backbone = backbone self.use_pretrained_backbone = use_pretrained_backbone self.backbone_kwargs = backbone_kwargs - self._dilation = dilation + self.dilation = dilation # Hungarian matcher self.class_cost = class_cost self.bbox_cost = bbox_cost @@ -250,16 +250,6 @@ def __init__( self.eos_coefficient = eos_coefficient super().__init__(is_encoder_decoder=is_encoder_decoder, **kwargs) - @property - def num_channels(self): - logger.warn("The `num_channels` attribute is deprecated and will be removed in v4.40") - return self._num_channels - - @property - def dilation(self): - logger.warn("The `dilation` attribute is deprecated and will be removed in v4.40") - return self._dilation - @property def num_attention_heads(self) -> int: return self.encoder_attention_heads diff --git a/src/transformers/models/table_transformer/modeling_table_transformer.py b/src/transformers/models/table_transformer/modeling_table_transformer.py index bf622be57a6b92..32a656302eac2b 100644 --- a/src/transformers/models/table_transformer/modeling_table_transformer.py +++ b/src/transformers/models/table_transformer/modeling_table_transformer.py @@ -32,6 +32,7 @@ add_start_docstrings_to_model_forward, is_accelerate_available, is_scipy_available, + is_timm_available, is_vision_available, logging, replace_return_docstrings, @@ -44,6 +45,9 @@ if is_scipy_available(): from scipy.optimize import linear_sum_assignment +if is_timm_available(): + from timm.models import create_model + if is_vision_available(): from transformers.image_transforms import center_to_corners_format @@ -274,13 +278,33 @@ def __init__(self, config): super().__init__() self.config = config - backbone = load_backbone(config) + + # For backwards compatibility we have to use the timm library directly instead of the AutoBackbone API + if config.use_timm_backbone: + requires_backends(self, ["timm"]) + kwargs = getattr(config, "backbone_kwargs", {}) + out_indices = kwargs.pop("out_indices", (1, 2, 3, 4)) + num_channels = kwargs.pop("in_chans", config.num_channels) + if config.dilation: + kwargs["output_stride"] = kwargs.get("output_stride", 16) + backbone = create_model( + config.backbone, + pretrained=config.use_pretrained_backbone, + features_only=True, + out_indices=out_indices, + in_chans=num_channels, + **kwargs, + ) + else: + backbone = load_backbone(config) # replace batch norm by frozen batch norm with torch.no_grad(): replace_batch_norm(backbone) self.model = backbone - self.intermediate_channel_sizes = self.model.channels + self.intermediate_channel_sizes = ( + self.model.feature_info.channels() if config.use_timm_backbone else self.model.channels + ) backbone_model_type = config.backbone if config.use_timm_backbone else config.backbone_config.model_type if "resnet" in backbone_model_type: @@ -294,7 +318,7 @@ def __init__(self, config): def forward(self, pixel_values: torch.Tensor, pixel_mask: torch.Tensor): # send pixel_values through the model to get list of feature maps - features = self.model(pixel_values).feature_maps + features = self.model(pixel_values) if self.config.use_timm_backbone else self.model(pixel_values).feature_maps out = [] for feature_map in features: diff --git a/tests/utils/test_backbone_utils.py b/tests/utils/test_backbone_utils.py index 244f62950f048f..cd9a5a29a8c071 100644 --- a/tests/utils/test_backbone_utils.py +++ b/tests/utils/test_backbone_utils.py @@ -167,7 +167,7 @@ def test_load_backbone_from_checkpoint(self): ) backbone = load_backbone(config) # We can't know ahead of time the exact output features and indices, or the layer names before - # creating the timm model, so it defalts to the last layer (-1,) and has a different layer name + # creating the timm model, so it defaults to the last layer (-1,) and has a different layer name self.assertEqual(backbone.out_indices, (-1,)) self.assertEqual(backbone.out_features, ["layer4"]) self.assertIsInstance(backbone, TimmBackbone) From 5bf5329d1605b78e427643336226164a845cc47c Mon Sep 17 00:00:00 2001 From: Amy Roberts <22614925+amyeroberts@users.noreply.github.com> Date: Fri, 8 Mar 2024 13:14:49 +0000 Subject: [PATCH 23/32] More tidying up --- .../configuration_conditional_detr.py | 6 ----- .../modeling_conditional_detr.py | 6 ++--- .../modeling_deformable_detr.py | 25 ++++++++++++++++--- .../models/detr/configuration_detr.py | 6 ----- .../configuration_table_transformer.py | 6 ----- .../modeling_table_transformer.py | 2 +- utils/check_config_attributes.py | 4 --- 7 files changed, 26 insertions(+), 29 deletions(-) diff --git a/src/transformers/models/conditional_detr/configuration_conditional_detr.py b/src/transformers/models/conditional_detr/configuration_conditional_detr.py index bad7092419506c..88c3a4d51a19d0 100644 --- a/src/transformers/models/conditional_detr/configuration_conditional_detr.py +++ b/src/transformers/models/conditional_detr/configuration_conditional_detr.py @@ -255,12 +255,6 @@ def num_attention_heads(self) -> int: def hidden_size(self) -> int: return self.d_model - def to_dict(self): - output = super().to_dict() - output.pop("_num_channels", None) - output.pop("_dilation", None) - return output - class ConditionalDetrOnnxConfig(OnnxConfig): torch_onnx_minimum_version = version.parse("1.11") diff --git a/src/transformers/models/conditional_detr/modeling_conditional_detr.py b/src/transformers/models/conditional_detr/modeling_conditional_detr.py index b634462437da72..6adc90a328cd35 100644 --- a/src/transformers/models/conditional_detr/modeling_conditional_detr.py +++ b/src/transformers/models/conditional_detr/modeling_conditional_detr.py @@ -50,7 +50,7 @@ from scipy.optimize import linear_sum_assignment if is_timm_available(): - from timm.models import create_model + from timm import create_model if is_vision_available(): from ...image_transforms import center_to_corners_format @@ -338,8 +338,8 @@ def replace_batch_norm(model): replace_batch_norm(module) -# Copied from transformers.models.detr.modeling_detr.DetrConvEncoder -class DetrConvEncoder(nn.Module): +# Copied from transformers.models.detr.modeling_detr.DetrConvEncoder with Detr >ConditionalDetr +class ConditionalDetrConvEncoder(nn.Module): """ Convolutional backbone, using either the AutoBackbone API or one from the timm library. diff --git a/src/transformers/models/deformable_detr/modeling_deformable_detr.py b/src/transformers/models/deformable_detr/modeling_deformable_detr.py index e146c1c8312da2..52eeb14ebbbb87 100755 --- a/src/transformers/models/deformable_detr/modeling_deformable_detr.py +++ b/src/transformers/models/deformable_detr/modeling_deformable_detr.py @@ -93,7 +93,7 @@ def load_cuda_kernels(): from accelerate.utils import reduce if is_timm_available(): - pass + from timm import create_model class MultiScaleDeformableAttentionFunction(Function): @@ -420,13 +420,32 @@ def __init__(self, config): super().__init__() self.config = config - backbone = load_backbone(config) + + if config.use_timm_backbone: + requires_backends(self, ["timm"]) + kwargs = getattr(config, "backbone_kwargs", {}) + out_indices = kwargs.pop("out_indices", (2, 3, 4) if config.num_feature_levels > 1 else (4,)) + num_channels = kwargs.pop("in_chans", config.num_channels) + if config.dilation: + kwargs["output_stride"] = kwargs.get("output_stride", 16) + backbone = create_model( + config.backbone, + pretrained=config.use_pretrained_backbone, + features_only=True, + out_indices=out_indices, + in_chans=in_chans, + **kwargs, + ) + else: + backbone = load_backbone(config) # replace batch norm by frozen batch norm with torch.no_grad(): replace_batch_norm(backbone) self.model = backbone - self.intermediate_channel_sizes = self.model.channels + self.intermediate_channel_sizes = ( + self.model.feature_info.channels() if config.use_timm_backbone else self.model.channels + ) backbone_model_type = config.backbone if config.use_timm_backbone else config.backbone_config.model_type if "resnet" in backbone_model_type: diff --git a/src/transformers/models/detr/configuration_detr.py b/src/transformers/models/detr/configuration_detr.py index 2f3ce689e4ae0a..c5dafa8f9b7649 100644 --- a/src/transformers/models/detr/configuration_detr.py +++ b/src/transformers/models/detr/configuration_detr.py @@ -270,12 +270,6 @@ def from_backbone_config(cls, backbone_config: PretrainedConfig, **kwargs): """ return cls(backbone_config=backbone_config, **kwargs) - def to_dict(self): - output = super().to_dict() - output.pop("_num_channels", None) - output.pop("_dilation", None) - return output - class DetrOnnxConfig(OnnxConfig): torch_onnx_minimum_version = version.parse("1.11") diff --git a/src/transformers/models/table_transformer/configuration_table_transformer.py b/src/transformers/models/table_transformer/configuration_table_transformer.py index c18bbea69ee4a1..c7f14b9d202b3b 100644 --- a/src/transformers/models/table_transformer/configuration_table_transformer.py +++ b/src/transformers/models/table_transformer/configuration_table_transformer.py @@ -258,12 +258,6 @@ def num_attention_heads(self) -> int: def hidden_size(self) -> int: return self.d_model - def to_dict(self): - output = super().to_dict() - output.pop("_num_channels", None) - output.pop("_dilation", None) - return output - # Copied from transformers.models.detr.configuration_detr.DetrOnnxConfig class TableTransformerOnnxConfig(OnnxConfig): diff --git a/src/transformers/models/table_transformer/modeling_table_transformer.py b/src/transformers/models/table_transformer/modeling_table_transformer.py index 32a656302eac2b..f06de5d8d2fbd8 100644 --- a/src/transformers/models/table_transformer/modeling_table_transformer.py +++ b/src/transformers/models/table_transformer/modeling_table_transformer.py @@ -46,7 +46,7 @@ from scipy.optimize import linear_sum_assignment if is_timm_available(): - from timm.models import create_model + from timm import create_model if is_vision_available(): from transformers.image_transforms import center_to_corners_format diff --git a/utils/check_config_attributes.py b/utils/check_config_attributes.py index 84abb1386bd2cf..f631c59b75d40e 100644 --- a/utils/check_config_attributes.py +++ b/utils/check_config_attributes.py @@ -132,13 +132,9 @@ # TODO (ydshieh): Check the failing cases, try to fix them or move some cases to the above block once we are sure SPECIAL_CASES_TO_ALLOW.update( { - # Has no longer used attributes that will be removed after a deprecation cycle in v4.40 - "ConditionalDetrConfig": True, "CLIPSegConfig": True, "DeformableDetrConfig": True, "DetaConfig": True, - # Has no longer used attributes that will be removed after a deprecation cycle in v4.40 - "DetrConfig": True, "DinatConfig": True, "DonutSwinConfig": True, "EfficientFormerConfig": True, From 7d4e93abe654bfb8c3a0491d4952c7b98ac3cccf Mon Sep 17 00:00:00 2001 From: Amy Roberts <22614925+amyeroberts@users.noreply.github.com> Date: Fri, 8 Mar 2024 13:16:29 +0000 Subject: [PATCH 24/32] Whoops --- .../models/conditional_detr/modeling_conditional_detr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/models/conditional_detr/modeling_conditional_detr.py b/src/transformers/models/conditional_detr/modeling_conditional_detr.py index 6adc90a328cd35..67c3c530723f8d 100644 --- a/src/transformers/models/conditional_detr/modeling_conditional_detr.py +++ b/src/transformers/models/conditional_detr/modeling_conditional_detr.py @@ -338,7 +338,7 @@ def replace_batch_norm(model): replace_batch_norm(module) -# Copied from transformers.models.detr.modeling_detr.DetrConvEncoder with Detr >ConditionalDetr +# Copied from transformers.models.detr.modeling_detr.DetrConvEncoder with Detr->ConditionalDetr class ConditionalDetrConvEncoder(nn.Module): """ Convolutional backbone, using either the AutoBackbone API or one from the timm library. From ac56450ff8d4041e48b30618a4f371611350f20c Mon Sep 17 00:00:00 2001 From: Amy Roberts <22614925+amyeroberts@users.noreply.github.com> Date: Fri, 8 Mar 2024 13:20:05 +0000 Subject: [PATCH 25/32] Tidy up --- .../modeling_conditional_detr.py | 2 +- .../modeling_deformable_detr.py | 32 +++++++++++-------- 2 files changed, 19 insertions(+), 15 deletions(-) diff --git a/src/transformers/models/conditional_detr/modeling_conditional_detr.py b/src/transformers/models/conditional_detr/modeling_conditional_detr.py index 67c3c530723f8d..bcc86b5bdcd44f 100644 --- a/src/transformers/models/conditional_detr/modeling_conditional_detr.py +++ b/src/transformers/models/conditional_detr/modeling_conditional_detr.py @@ -343,7 +343,7 @@ class ConditionalDetrConvEncoder(nn.Module): """ Convolutional backbone, using either the AutoBackbone API or one from the timm library. - nn.BatchNorm2d layers are replaced by DetrFrozenBatchNorm2d as defined above. + nn.BatchNorm2d layers are replaced by ConditionalDetrFrozenBatchNorm2d as defined above. """ diff --git a/src/transformers/models/deformable_detr/modeling_deformable_detr.py b/src/transformers/models/deformable_detr/modeling_deformable_detr.py index 52eeb14ebbbb87..5d4bf9023c07e0 100755 --- a/src/transformers/models/deformable_detr/modeling_deformable_detr.py +++ b/src/transformers/models/deformable_detr/modeling_deformable_detr.py @@ -88,14 +88,31 @@ def load_cuda_kernels(): if is_vision_available(): from transformers.image_transforms import center_to_corners_format + if is_accelerate_available(): from accelerate import PartialState from accelerate.utils import reduce + if is_timm_available(): from timm import create_model +if is_scipy_available(): + from scipy.optimize import linear_sum_assignment + + +logger = logging.get_logger(__name__) + +_CONFIG_FOR_DOC = "DeformableDetrConfig" +_CHECKPOINT_FOR_DOC = "sensetime/deformable-detr" + +DEFORMABLE_DETR_PRETRAINED_MODEL_ARCHIVE_LIST = [ + "sensetime/deformable-detr", + # See all Deformable DETR models at https://huggingface.co/models?filter=deformable-detr +] + + class MultiScaleDeformableAttentionFunction(Function): @staticmethod def forward( @@ -144,19 +161,6 @@ def backward(context, grad_output): return grad_value, None, None, grad_sampling_loc, grad_attn_weight, None -if is_scipy_available(): - from scipy.optimize import linear_sum_assignment - - -logger = logging.get_logger(__name__) - -_CONFIG_FOR_DOC = "DeformableDetrConfig" -_CHECKPOINT_FOR_DOC = "sensetime/deformable-detr" - - -from ..deprecated._archive_maps import DEFORMABLE_DETR_PRETRAINED_MODEL_ARCHIVE_LIST # noqa: F401, E402 - - @dataclass class DeformableDetrDecoderOutput(ModelOutput): """ @@ -433,7 +437,7 @@ def __init__(self, config): pretrained=config.use_pretrained_backbone, features_only=True, out_indices=out_indices, - in_chans=in_chans, + in_chans=num_channels, **kwargs, ) else: From 1c55822e8630bc7533563f6908430d9681b44185 Mon Sep 17 00:00:00 2001 From: Amy Roberts <22614925+amyeroberts@users.noreply.github.com> Date: Fri, 8 Mar 2024 15:09:02 +0000 Subject: [PATCH 26/32] Handle when kwargs are none --- .../models/conditional_detr/modeling_conditional_detr.py | 1 + .../models/deformable_detr/modeling_deformable_detr.py | 1 + src/transformers/models/detr/modeling_detr.py | 1 + .../models/table_transformer/modeling_table_transformer.py | 1 + 4 files changed, 4 insertions(+) diff --git a/src/transformers/models/conditional_detr/modeling_conditional_detr.py b/src/transformers/models/conditional_detr/modeling_conditional_detr.py index bcc86b5bdcd44f..f2d762addbb2ca 100644 --- a/src/transformers/models/conditional_detr/modeling_conditional_detr.py +++ b/src/transformers/models/conditional_detr/modeling_conditional_detr.py @@ -356,6 +356,7 @@ def __init__(self, config): if config.use_timm_backbone: requires_backends(self, ["timm"]) kwargs = getattr(config, "backbone_kwargs", {}) + kwargs = {} if kwargs is None else kwargs out_indices = kwargs.pop("out_indices", (1, 2, 3, 4)) num_channels = kwargs.pop("in_chans", config.num_channels) if config.dilation: diff --git a/src/transformers/models/deformable_detr/modeling_deformable_detr.py b/src/transformers/models/deformable_detr/modeling_deformable_detr.py index 5d4bf9023c07e0..61a5c7a1f112ad 100755 --- a/src/transformers/models/deformable_detr/modeling_deformable_detr.py +++ b/src/transformers/models/deformable_detr/modeling_deformable_detr.py @@ -428,6 +428,7 @@ def __init__(self, config): if config.use_timm_backbone: requires_backends(self, ["timm"]) kwargs = getattr(config, "backbone_kwargs", {}) + kwargs = {} if kwargs is None else kwargs out_indices = kwargs.pop("out_indices", (2, 3, 4) if config.num_feature_levels > 1 else (4,)) num_channels = kwargs.pop("in_chans", config.num_channels) if config.dilation: diff --git a/src/transformers/models/detr/modeling_detr.py b/src/transformers/models/detr/modeling_detr.py index 8b5c47b45bda97..67cde1da2ddce0 100644 --- a/src/transformers/models/detr/modeling_detr.py +++ b/src/transformers/models/detr/modeling_detr.py @@ -351,6 +351,7 @@ def __init__(self, config): if config.use_timm_backbone: requires_backends(self, ["timm"]) kwargs = getattr(config, "backbone_kwargs", {}) + kwargs = {} if kwargs is None else kwargs out_indices = kwargs.pop("out_indices", (1, 2, 3, 4)) num_channels = kwargs.pop("in_chans", config.num_channels) if config.dilation: diff --git a/src/transformers/models/table_transformer/modeling_table_transformer.py b/src/transformers/models/table_transformer/modeling_table_transformer.py index f06de5d8d2fbd8..a600c50c6f1b8d 100644 --- a/src/transformers/models/table_transformer/modeling_table_transformer.py +++ b/src/transformers/models/table_transformer/modeling_table_transformer.py @@ -283,6 +283,7 @@ def __init__(self, config): if config.use_timm_backbone: requires_backends(self, ["timm"]) kwargs = getattr(config, "backbone_kwargs", {}) + kwargs = {} if kwargs is None else kwargs out_indices = kwargs.pop("out_indices", (1, 2, 3, 4)) num_channels = kwargs.pop("in_chans", config.num_channels) if config.dilation: From 30d3232dcc3c6d7c7764ff65b40a228c471e2042 Mon Sep 17 00:00:00 2001 From: Amy Roberts <22614925+amyeroberts@users.noreply.github.com> Date: Fri, 8 Mar 2024 15:38:28 +0000 Subject: [PATCH 27/32] Update tests --- .../models/conditional_detr/test_modeling_conditional_detr.py | 3 +++ tests/models/deformable_detr/test_modeling_deformable_detr.py | 3 +++ tests/models/detr/test_modeling_detr.py | 3 +++ .../table_transformer/test_modeling_table_transformer.py | 3 +++ 4 files changed, 12 insertions(+) diff --git a/tests/models/conditional_detr/test_modeling_conditional_detr.py b/tests/models/conditional_detr/test_modeling_conditional_detr.py index bc53f826ed2125..f541d0e6dc27e7 100644 --- a/tests/models/conditional_detr/test_modeling_conditional_detr.py +++ b/tests/models/conditional_detr/test_modeling_conditional_detr.py @@ -465,6 +465,9 @@ def test_different_timm_backbone(self): self.assertTrue(outputs) + # Confirm out_indices was propogated to backbone + self.assertEqual(len(model.backbone.intermediate_channel_sizes), 3) + def test_initialization(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() diff --git a/tests/models/deformable_detr/test_modeling_deformable_detr.py b/tests/models/deformable_detr/test_modeling_deformable_detr.py index 7a83c4f1ed80a8..dc3fd57a611204 100644 --- a/tests/models/deformable_detr/test_modeling_deformable_detr.py +++ b/tests/models/deformable_detr/test_modeling_deformable_detr.py @@ -541,6 +541,9 @@ def test_different_timm_backbone(self): self.assertTrue(outputs) + # Confirm out_indices was propogated to backbone + self.assertEqual(len(model.backbone.intermediate_channel_sizes), 3) + def test_initialization(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() diff --git a/tests/models/detr/test_modeling_detr.py b/tests/models/detr/test_modeling_detr.py index e581ce1729668b..63ce05ef41164f 100644 --- a/tests/models/detr/test_modeling_detr.py +++ b/tests/models/detr/test_modeling_detr.py @@ -465,6 +465,9 @@ def test_different_timm_backbone(self): self.assertTrue(outputs) + # Confirm out_indices was propogated to backbone + self.assertEqual(len(model.backbone.intermediate_channel_sizes), 3) + def test_greyscale_images(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() diff --git a/tests/models/table_transformer/test_modeling_table_transformer.py b/tests/models/table_transformer/test_modeling_table_transformer.py index 8f3fd1d7e8f8b7..a1b0303793eff7 100644 --- a/tests/models/table_transformer/test_modeling_table_transformer.py +++ b/tests/models/table_transformer/test_modeling_table_transformer.py @@ -477,6 +477,9 @@ def test_different_timm_backbone(self): self.assertTrue(outputs) + # Confirm out_indices was propogated to backbone + self.assertEqual(len(model.backbone.intermediate_channel_sizes), 3) + def test_greyscale_images(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() From 818343685b07b7972f185c18165d86540ae5add2 Mon Sep 17 00:00:00 2001 From: Amy Roberts <22614925+amyeroberts@users.noreply.github.com> Date: Fri, 8 Mar 2024 15:53:24 +0000 Subject: [PATCH 28/32] Revert test changes --- tests/models/detr/test_modeling_detr.py | 1 + .../models/table_transformer/test_modeling_table_transformer.py | 1 + 2 files changed, 2 insertions(+) diff --git a/tests/models/detr/test_modeling_detr.py b/tests/models/detr/test_modeling_detr.py index 63ce05ef41164f..d2a45fdf2c6433 100644 --- a/tests/models/detr/test_modeling_detr.py +++ b/tests/models/detr/test_modeling_detr.py @@ -477,6 +477,7 @@ def test_greyscale_images(self): ) # let's set num_channels to 1 + config.num_channels = 1 config.backbone_config.num_channels = 1 for model_class in self.all_model_classes: diff --git a/tests/models/table_transformer/test_modeling_table_transformer.py b/tests/models/table_transformer/test_modeling_table_transformer.py index a1b0303793eff7..01e00531d55655 100644 --- a/tests/models/table_transformer/test_modeling_table_transformer.py +++ b/tests/models/table_transformer/test_modeling_table_transformer.py @@ -489,6 +489,7 @@ def test_greyscale_images(self): ) # let's set num_channels to 1 + config.num_channels = 1 config.backbone_config.num_channels = 1 for model_class in self.all_model_classes: From 61fe67360250ac1526dc432bee3666953789de0f Mon Sep 17 00:00:00 2001 From: Amy Roberts <22614925+amyeroberts@users.noreply.github.com> Date: Fri, 8 Mar 2024 15:58:15 +0000 Subject: [PATCH 29/32] Deformable detr test - don't use default --- .../models/deformable_detr/test_modeling_deformable_detr.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/models/deformable_detr/test_modeling_deformable_detr.py b/tests/models/deformable_detr/test_modeling_deformable_detr.py index dc3fd57a611204..1413fd0f072809 100644 --- a/tests/models/deformable_detr/test_modeling_deformable_detr.py +++ b/tests/models/deformable_detr/test_modeling_deformable_detr.py @@ -521,8 +521,9 @@ def test_different_timm_backbone(self): # let's pick a random timm backbone config.backbone = "tf_mobilenetv3_small_075" - config.use_timm_backbone = True config.backbone_config = None + config.use_timm_backbone = True + config.backbone_kwargs = {"out_indices": [1, 2, 3, 4]} for model_class in self.all_model_classes: model = model_class(config) @@ -542,7 +543,7 @@ def test_different_timm_backbone(self): self.assertTrue(outputs) # Confirm out_indices was propogated to backbone - self.assertEqual(len(model.backbone.intermediate_channel_sizes), 3) + self.assertEqual(len(model.backbone.intermediate_channel_sizes), 4) def test_initialization(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() From 94309d9284c10f56921d534b5a75aaa0801ad98b Mon Sep 17 00:00:00 2001 From: Amy Roberts <22614925+amyeroberts@users.noreply.github.com> Date: Tue, 12 Mar 2024 14:06:07 +0000 Subject: [PATCH 30/32] Don't mutate; correct model attributes --- .../conditional_detr/modeling_conditional_detr.py | 2 +- .../deformable_detr/modeling_deformable_detr.py | 2 +- src/transformers/models/detr/modeling_detr.py | 2 +- .../table_transformer/modeling_table_transformer.py | 2 +- .../test_modeling_conditional_detr.py | 11 ++++++++--- .../deformable_detr/test_modeling_deformable_detr.py | 11 ++++++++--- tests/models/detr/test_modeling_detr.py | 11 ++++++++--- .../test_modeling_table_transformer.py | 8 +++++--- 8 files changed, 33 insertions(+), 16 deletions(-) diff --git a/src/transformers/models/conditional_detr/modeling_conditional_detr.py b/src/transformers/models/conditional_detr/modeling_conditional_detr.py index f2d762addbb2ca..c464a41861b37a 100644 --- a/src/transformers/models/conditional_detr/modeling_conditional_detr.py +++ b/src/transformers/models/conditional_detr/modeling_conditional_detr.py @@ -356,7 +356,7 @@ def __init__(self, config): if config.use_timm_backbone: requires_backends(self, ["timm"]) kwargs = getattr(config, "backbone_kwargs", {}) - kwargs = {} if kwargs is None else kwargs + kwargs = {} if kwargs is None else kwargs.copy() out_indices = kwargs.pop("out_indices", (1, 2, 3, 4)) num_channels = kwargs.pop("in_chans", config.num_channels) if config.dilation: diff --git a/src/transformers/models/deformable_detr/modeling_deformable_detr.py b/src/transformers/models/deformable_detr/modeling_deformable_detr.py index 61a5c7a1f112ad..62b9e8768da765 100755 --- a/src/transformers/models/deformable_detr/modeling_deformable_detr.py +++ b/src/transformers/models/deformable_detr/modeling_deformable_detr.py @@ -428,7 +428,7 @@ def __init__(self, config): if config.use_timm_backbone: requires_backends(self, ["timm"]) kwargs = getattr(config, "backbone_kwargs", {}) - kwargs = {} if kwargs is None else kwargs + kwargs = {} if kwargs is None else kwargs.copy() out_indices = kwargs.pop("out_indices", (2, 3, 4) if config.num_feature_levels > 1 else (4,)) num_channels = kwargs.pop("in_chans", config.num_channels) if config.dilation: diff --git a/src/transformers/models/detr/modeling_detr.py b/src/transformers/models/detr/modeling_detr.py index 67cde1da2ddce0..98aa634ea2fb54 100644 --- a/src/transformers/models/detr/modeling_detr.py +++ b/src/transformers/models/detr/modeling_detr.py @@ -351,7 +351,7 @@ def __init__(self, config): if config.use_timm_backbone: requires_backends(self, ["timm"]) kwargs = getattr(config, "backbone_kwargs", {}) - kwargs = {} if kwargs is None else kwargs + kwargs = {} if kwargs is None else kwargs.copy() out_indices = kwargs.pop("out_indices", (1, 2, 3, 4)) num_channels = kwargs.pop("in_chans", config.num_channels) if config.dilation: diff --git a/src/transformers/models/table_transformer/modeling_table_transformer.py b/src/transformers/models/table_transformer/modeling_table_transformer.py index a600c50c6f1b8d..ee98521d759255 100644 --- a/src/transformers/models/table_transformer/modeling_table_transformer.py +++ b/src/transformers/models/table_transformer/modeling_table_transformer.py @@ -283,7 +283,7 @@ def __init__(self, config): if config.use_timm_backbone: requires_backends(self, ["timm"]) kwargs = getattr(config, "backbone_kwargs", {}) - kwargs = {} if kwargs is None else kwargs + kwargs = {} if kwargs is None else kwargs.copy() out_indices = kwargs.pop("out_indices", (1, 2, 3, 4)) num_channels = kwargs.pop("in_chans", config.num_channels) if config.dilation: diff --git a/tests/models/conditional_detr/test_modeling_conditional_detr.py b/tests/models/conditional_detr/test_modeling_conditional_detr.py index f541d0e6dc27e7..c3f77614b4dd31 100644 --- a/tests/models/conditional_detr/test_modeling_conditional_detr.py +++ b/tests/models/conditional_detr/test_modeling_conditional_detr.py @@ -462,12 +462,17 @@ def test_different_timm_backbone(self): self.model_tester.num_labels, ) self.assertEqual(outputs.logits.shape, expected_shape) + # Confirm out_indices was propogated to backbone + self.assertEqual(len(model.model.backbone.conv_encoder.intermediate_channel_sizes), 3) + elif model_class.__name__ == "ConditionalDetrForSegmentation": + # Confirm out_indices was propogated to backbone + self.assertEqual(len(model.conditional_detr.model.backbone.conv_encoder.intermediate_channel_sizes), 3) + else: + # Confirm out_indices was propogated to backbone + self.assertEqual(len(model.backbone.conv_encoder.intermediate_channel_sizes), 3) self.assertTrue(outputs) - # Confirm out_indices was propogated to backbone - self.assertEqual(len(model.backbone.intermediate_channel_sizes), 3) - def test_initialization(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() diff --git a/tests/models/deformable_detr/test_modeling_deformable_detr.py b/tests/models/deformable_detr/test_modeling_deformable_detr.py index 1413fd0f072809..36be099790a45b 100644 --- a/tests/models/deformable_detr/test_modeling_deformable_detr.py +++ b/tests/models/deformable_detr/test_modeling_deformable_detr.py @@ -539,12 +539,17 @@ def test_different_timm_backbone(self): self.model_tester.num_labels, ) self.assertEqual(outputs.logits.shape, expected_shape) + # Confirm out_indices was propogated to backbone + self.assertEqual(len(model.model.backbone.conv_encoder.intermediate_channel_sizes), 4) + elif model_class.__name__ == "ConditionalDetrForSegmentation": + # Confirm out_indices was propogated to backbone + self.assertEqual(len(model.deformable_detr.model.backbone.conv_encoder.intermediate_channel_sizes), 4) + else: + # Confirm out_indices was propogated to backbone + self.assertEqual(len(model.backbone.conv_encoder.intermediate_channel_sizes), 4) self.assertTrue(outputs) - # Confirm out_indices was propogated to backbone - self.assertEqual(len(model.backbone.intermediate_channel_sizes), 4) - def test_initialization(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() diff --git a/tests/models/detr/test_modeling_detr.py b/tests/models/detr/test_modeling_detr.py index d2a45fdf2c6433..27092c626dd46d 100644 --- a/tests/models/detr/test_modeling_detr.py +++ b/tests/models/detr/test_modeling_detr.py @@ -462,12 +462,17 @@ def test_different_timm_backbone(self): self.model_tester.num_labels + 1, ) self.assertEqual(outputs.logits.shape, expected_shape) + # Confirm out_indices was propogated to backbone + self.assertEqual(len(model.model.backbone.conv_encoder.intermediate_channel_sizes), 3) + elif model_class.__name__ == "DetrForSegmentation": + # Confirm out_indices was propogated to backbone + self.assertEqual(len(model.detr.model.backbone.conv_encoder.intermediate_channel_sizes), 3) + else: + # Confirm out_indices was propogated to backbone + self.assertEqual(len(model.backbone.conv_encoder.intermediate_channel_sizes), 3) self.assertTrue(outputs) - # Confirm out_indices was propogated to backbone - self.assertEqual(len(model.backbone.intermediate_channel_sizes), 3) - def test_greyscale_images(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() diff --git a/tests/models/table_transformer/test_modeling_table_transformer.py b/tests/models/table_transformer/test_modeling_table_transformer.py index 01e00531d55655..d323083eb7f1d4 100644 --- a/tests/models/table_transformer/test_modeling_table_transformer.py +++ b/tests/models/table_transformer/test_modeling_table_transformer.py @@ -474,12 +474,14 @@ def test_different_timm_backbone(self): self.model_tester.num_labels + 1, ) self.assertEqual(outputs.logits.shape, expected_shape) + # Confirm out_indices was propogated to backbone + self.assertEqual(len(model.model.backbone.conv_encoder.intermediate_channel_sizes), 3) + else: + # Confirm out_indices was propogated to backbone + self.assertEqual(len(model.backbone.conv_encoder.intermediate_channel_sizes), 3) self.assertTrue(outputs) - # Confirm out_indices was propogated to backbone - self.assertEqual(len(model.backbone.intermediate_channel_sizes), 3) - def test_greyscale_images(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() From 80b32cc8a6d2a5c9cd223e518beb00db18d64201 Mon Sep 17 00:00:00 2001 From: Amy Roberts <22614925+amyeroberts@users.noreply.github.com> Date: Fri, 26 Apr 2024 10:59:53 +0000 Subject: [PATCH 31/32] Add some clarifying comments --- .../models/conditional_detr/configuration_conditional_detr.py | 2 ++ .../models/conditional_detr/modeling_conditional_detr.py | 2 ++ .../models/deformable_detr/configuration_deformable_detr.py | 2 ++ .../models/deformable_detr/modeling_deformable_detr.py | 3 +++ src/transformers/models/detr/configuration_detr.py | 2 ++ src/transformers/models/detr/modeling_detr.py | 2 ++ .../table_transformer/configuration_table_transformer.py | 2 ++ .../models/table_transformer/modeling_table_transformer.py | 2 ++ 8 files changed, 17 insertions(+) diff --git a/src/transformers/models/conditional_detr/configuration_conditional_detr.py b/src/transformers/models/conditional_detr/configuration_conditional_detr.py index 88c3a4d51a19d0..f89e9bc04f2807 100644 --- a/src/transformers/models/conditional_detr/configuration_conditional_detr.py +++ b/src/transformers/models/conditional_detr/configuration_conditional_detr.py @@ -192,6 +192,8 @@ def __init__( if backbone_config is not None and use_timm_backbone: raise ValueError("You can't specify both `backbone_config` and `use_timm_backbone`.") + # We default to values which were previously hard-coded in the model. This enables configurability of config + # while keeping the default behavior the same. if use_timm_backbone and backbone_kwargs is None: backbone_kwargs = {} if dilation: diff --git a/src/transformers/models/conditional_detr/modeling_conditional_detr.py b/src/transformers/models/conditional_detr/modeling_conditional_detr.py index c464a41861b37a..d723d3866ea416 100644 --- a/src/transformers/models/conditional_detr/modeling_conditional_detr.py +++ b/src/transformers/models/conditional_detr/modeling_conditional_detr.py @@ -354,6 +354,8 @@ def __init__(self, config): # For backwards compatibility we have to use the timm library directly instead of the AutoBackbone API if config.use_timm_backbone: + # We default to values which were previously hard-coded. This enables configurability from the config + # using backbone arguments, while keeping the default behavior the same. requires_backends(self, ["timm"]) kwargs = getattr(config, "backbone_kwargs", {}) kwargs = {} if kwargs is None else kwargs.copy() diff --git a/src/transformers/models/deformable_detr/configuration_deformable_detr.py b/src/transformers/models/deformable_detr/configuration_deformable_detr.py index 41642ec8783c94..5fd6bdb82f5839 100644 --- a/src/transformers/models/deformable_detr/configuration_deformable_detr.py +++ b/src/transformers/models/deformable_detr/configuration_deformable_detr.py @@ -212,6 +212,8 @@ def __init__( if backbone_kwargs is not None and backbone_kwargs and backbone_config is not None: raise ValueError("You can't specify both `backbone_kwargs` and `backbone_config`.") + # We default to values which were previously hard-coded in the model. This enables configurability of config + # while keeping the default behavior the same. if use_timm_backbone and backbone_kwargs is None: backbone_kwargs = {} if dilation: diff --git a/src/transformers/models/deformable_detr/modeling_deformable_detr.py b/src/transformers/models/deformable_detr/modeling_deformable_detr.py index 62b9e8768da765..7b2bbb9b1242c9 100755 --- a/src/transformers/models/deformable_detr/modeling_deformable_detr.py +++ b/src/transformers/models/deformable_detr/modeling_deformable_detr.py @@ -425,7 +425,10 @@ def __init__(self, config): self.config = config + # For backwards compatibility we have to use the timm library directly instead of the AutoBackbone API if config.use_timm_backbone: + # We default to values which were previously hard-coded. This enables configurability from the config + # using backbone arguments, while keeping the default behavior the same. requires_backends(self, ["timm"]) kwargs = getattr(config, "backbone_kwargs", {}) kwargs = {} if kwargs is None else kwargs.copy() diff --git a/src/transformers/models/detr/configuration_detr.py b/src/transformers/models/detr/configuration_detr.py index c5dafa8f9b7649..1606618fc4d8c3 100644 --- a/src/transformers/models/detr/configuration_detr.py +++ b/src/transformers/models/detr/configuration_detr.py @@ -193,6 +193,8 @@ def __init__( if backbone_kwargs is not None and backbone_kwargs and backbone_config is not None: raise ValueError("You can't specify both `backbone_kwargs` and `backbone_config`.") + # We default to values which were previously hard-coded in the model. This enables configurability of config + # while keeping the default behavior the same. if use_timm_backbone and backbone_kwargs is None: backbone_kwargs = {} if dilation: diff --git a/src/transformers/models/detr/modeling_detr.py b/src/transformers/models/detr/modeling_detr.py index 98aa634ea2fb54..0da702db8b67e2 100644 --- a/src/transformers/models/detr/modeling_detr.py +++ b/src/transformers/models/detr/modeling_detr.py @@ -349,6 +349,8 @@ def __init__(self, config): # For backwards compatibility we have to use the timm library directly instead of the AutoBackbone API if config.use_timm_backbone: + # We default to values which were previously hard-coded. This enables configurability from the config + # using backbone arguments, while keeping the default behavior the same. requires_backends(self, ["timm"]) kwargs = getattr(config, "backbone_kwargs", {}) kwargs = {} if kwargs is None else kwargs.copy() diff --git a/src/transformers/models/table_transformer/configuration_table_transformer.py b/src/transformers/models/table_transformer/configuration_table_transformer.py index c7f14b9d202b3b..037f2c82929001 100644 --- a/src/transformers/models/table_transformer/configuration_table_transformer.py +++ b/src/transformers/models/table_transformer/configuration_table_transformer.py @@ -193,6 +193,8 @@ def __init__( if backbone_kwargs is not None and backbone_kwargs and backbone_config is not None: raise ValueError("You can't specify both `backbone_kwargs` and `backbone_config`.") + # We default to values which were previously hard-coded in the model. This enables configurability of config + # while keeping the default behavior the same. if use_timm_backbone and backbone_kwargs is None: backbone_kwargs = {} if dilation: diff --git a/src/transformers/models/table_transformer/modeling_table_transformer.py b/src/transformers/models/table_transformer/modeling_table_transformer.py index ee98521d759255..9a684ee121ddca 100644 --- a/src/transformers/models/table_transformer/modeling_table_transformer.py +++ b/src/transformers/models/table_transformer/modeling_table_transformer.py @@ -281,6 +281,8 @@ def __init__(self, config): # For backwards compatibility we have to use the timm library directly instead of the AutoBackbone API if config.use_timm_backbone: + # We default to values which were previously hard-coded. This enables configurability from the config + # using backbone arguments, while keeping the default behavior the same. requires_backends(self, ["timm"]) kwargs = getattr(config, "backbone_kwargs", {}) kwargs = {} if kwargs is None else kwargs.copy() From 5a9799c1649a98841033895399c7e3d25202f550 Mon Sep 17 00:00:00 2001 From: Amy Roberts <22614925+amyeroberts@users.noreply.github.com> Date: Fri, 26 Apr 2024 11:04:06 +0000 Subject: [PATCH 32/32] nit - grammar is hard --- .../models/conditional_detr/configuration_conditional_detr.py | 2 +- .../models/deformable_detr/configuration_deformable_detr.py | 2 +- src/transformers/models/detr/configuration_detr.py | 2 +- .../models/table_transformer/configuration_table_transformer.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/transformers/models/conditional_detr/configuration_conditional_detr.py b/src/transformers/models/conditional_detr/configuration_conditional_detr.py index f89e9bc04f2807..4f95de3582f082 100644 --- a/src/transformers/models/conditional_detr/configuration_conditional_detr.py +++ b/src/transformers/models/conditional_detr/configuration_conditional_detr.py @@ -192,7 +192,7 @@ def __init__( if backbone_config is not None and use_timm_backbone: raise ValueError("You can't specify both `backbone_config` and `use_timm_backbone`.") - # We default to values which were previously hard-coded in the model. This enables configurability of config + # We default to values which were previously hard-coded in the model. This enables configurability of the config # while keeping the default behavior the same. if use_timm_backbone and backbone_kwargs is None: backbone_kwargs = {} diff --git a/src/transformers/models/deformable_detr/configuration_deformable_detr.py b/src/transformers/models/deformable_detr/configuration_deformable_detr.py index 5fd6bdb82f5839..3f3ffff69ff2e9 100644 --- a/src/transformers/models/deformable_detr/configuration_deformable_detr.py +++ b/src/transformers/models/deformable_detr/configuration_deformable_detr.py @@ -212,7 +212,7 @@ def __init__( if backbone_kwargs is not None and backbone_kwargs and backbone_config is not None: raise ValueError("You can't specify both `backbone_kwargs` and `backbone_config`.") - # We default to values which were previously hard-coded in the model. This enables configurability of config + # We default to values which were previously hard-coded in the model. This enables configurability of the config # while keeping the default behavior the same. if use_timm_backbone and backbone_kwargs is None: backbone_kwargs = {} diff --git a/src/transformers/models/detr/configuration_detr.py b/src/transformers/models/detr/configuration_detr.py index 1606618fc4d8c3..db180ef1d41fed 100644 --- a/src/transformers/models/detr/configuration_detr.py +++ b/src/transformers/models/detr/configuration_detr.py @@ -193,7 +193,7 @@ def __init__( if backbone_kwargs is not None and backbone_kwargs and backbone_config is not None: raise ValueError("You can't specify both `backbone_kwargs` and `backbone_config`.") - # We default to values which were previously hard-coded in the model. This enables configurability of config + # We default to values which were previously hard-coded in the model. This enables configurability of the config # while keeping the default behavior the same. if use_timm_backbone and backbone_kwargs is None: backbone_kwargs = {} diff --git a/src/transformers/models/table_transformer/configuration_table_transformer.py b/src/transformers/models/table_transformer/configuration_table_transformer.py index 037f2c82929001..4963396024a57e 100644 --- a/src/transformers/models/table_transformer/configuration_table_transformer.py +++ b/src/transformers/models/table_transformer/configuration_table_transformer.py @@ -193,7 +193,7 @@ def __init__( if backbone_kwargs is not None and backbone_kwargs and backbone_config is not None: raise ValueError("You can't specify both `backbone_kwargs` and `backbone_config`.") - # We default to values which were previously hard-coded in the model. This enables configurability of config + # We default to values which were previously hard-coded in the model. This enables configurability of the config # while keeping the default behavior the same. if use_timm_backbone and backbone_kwargs is None: backbone_kwargs = {}