From 75a8ccb6949850776e0362189cbce0b70008ceb0 Mon Sep 17 00:00:00 2001 From: Niels Date: Sat, 20 Apr 2024 13:44:10 +0200 Subject: [PATCH] Add backbone_hidden_size --- src/transformers/models/zoedepth/configuration_zoedepth.py | 4 ++++ src/transformers/models/zoedepth/modeling_zoedepth.py | 6 +++--- tests/models/zoedepth/test_modeling_zoedepth.py | 3 +++ 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/transformers/models/zoedepth/configuration_zoedepth.py b/src/transformers/models/zoedepth/configuration_zoedepth.py index 0a7bee732ad7ee..5ecdfb358cba5a 100644 --- a/src/transformers/models/zoedepth/configuration_zoedepth.py +++ b/src/transformers/models/zoedepth/configuration_zoedepth.py @@ -63,6 +63,8 @@ class ZoeDepthConfig(PretrainedConfig): - "project" passes information to the other tokens by concatenating the readout to all other tokens before projecting the representation to the original feature dimension D using a linear layer followed by a GELU non-linearity. + backbone_hidden_size (`int`, *optional*, defaults to 1024): + The hidden size of the backbone. reassemble_factors (`List[int]`, *optional*, defaults to `[4, 2, 1, 0.5]`): The up/downsampling factors of the reassemble layers. neck_hidden_sizes (`List[str]`, *optional*, defaults to `[96, 192, 384, 768]`): @@ -133,6 +135,7 @@ def __init__( hidden_act="gelu", initializer_range=0.02, readout_type="project", + backbone_hidden_size=1024, reassemble_factors=[4, 2, 1, 0.5], neck_hidden_sizes=[96, 192, 384, 768], fusion_hidden_size=256, @@ -198,6 +201,7 @@ def __init__( self.hidden_act = hidden_act self.use_pretrained_backbone = use_pretrained_backbone self.initializer_range = initializer_range + self.backbone_hidden_size = backbone_hidden_size self.readout_type = readout_type self.reassemble_factors = reassemble_factors self.neck_hidden_sizes = neck_hidden_sizes diff --git a/src/transformers/models/zoedepth/modeling_zoedepth.py b/src/transformers/models/zoedepth/modeling_zoedepth.py index 18dec8d8404c3c..748cf589c8b829 100644 --- a/src/transformers/models/zoedepth/modeling_zoedepth.py +++ b/src/transformers/models/zoedepth/modeling_zoedepth.py @@ -109,7 +109,7 @@ def __init__(self, config): if config.readout_type == "project": self.readout_projects = nn.ModuleList() - hidden_size = config.backbone_config.hidden_size + hidden_size = config.backbone_hidden_size for _ in range(len(config.neck_hidden_sizes)): self.readout_projects.append( nn.Sequential(nn.Linear(2 * hidden_size, hidden_size), ACT2FN[config.hidden_act]) @@ -156,7 +156,7 @@ class ZoeDepthReassembleLayer(nn.Module): def __init__(self, config, channels, factor): super().__init__() # projection - hidden_size = config.backbone_config.hidden_size + hidden_size = config.backbone_hidden_size self.projection = nn.Conv2d(in_channels=hidden_size, out_channels=channels, kernel_size=1) # up/down sampling depending on factor @@ -1302,7 +1302,7 @@ def forward( hidden_states = outputs.feature_maps _, _, height, width = pixel_values.shape - patch_size = self.config.backbone_config.patch_size + patch_size = self.backbone.config.patch_size patch_height = height // patch_size patch_width = width // patch_size diff --git a/tests/models/zoedepth/test_modeling_zoedepth.py b/tests/models/zoedepth/test_modeling_zoedepth.py index b132eb72cc1062..d400ca46b5ef86 100644 --- a/tests/models/zoedepth/test_modeling_zoedepth.py +++ b/tests/models/zoedepth/test_modeling_zoedepth.py @@ -56,6 +56,7 @@ def __init__( out_features=["stage1", "stage2"], apply_layernorm=False, reshape_hidden_states=False, + backbone_hidden_size=4, neck_hidden_sizes=[2, 2], fusion_hidden_size=6, bottleneck_features=6, @@ -76,6 +77,7 @@ def __init__( self.use_labels = use_labels self.num_labels = num_labels self.is_training = is_training + self.backbone_hidden_size = backbone_hidden_size self.neck_hidden_sizes = neck_hidden_sizes self.fusion_hidden_size = fusion_hidden_size self.bottleneck_features = bottleneck_features @@ -98,6 +100,7 @@ def get_config(self): return ZoeDepthConfig( backbone_config=self.get_backbone_config(), backbone=None, + backbone_hidden_size=self.backbone_hidden_size, neck_hidden_sizes=self.neck_hidden_sizes, fusion_hidden_size=self.fusion_hidden_size, bottleneck_features=self.bottleneck_features,