Skip to content

Commit

Permalink
Add custom architecture files to enable VLM merging of florence and q…
Browse files Browse the repository at this point in the history
…wen-vl models. Minor change to architecture.py to handle nested config keys
  • Loading branch information
ElliotStein committed Oct 10, 2024
1 parent 459121e commit e9e19bf
Show file tree
Hide file tree
Showing 5 changed files with 297 additions and 1 deletion.
11 changes: 11 additions & 0 deletions examples/vlm-linear.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
models:
# - model: Qwen/Qwen2-VL-7B-Instruct
- model: microsoft/Florence-2-base
parameters:
weight: 1.0
# - model: impactframes/Qwen2-VL-7B-Captioner
- model: maxiw/Florence-2-ScreenQA-base
parameters:
weight: 1.0
merge_method: linear
dtype: float16
18 changes: 18 additions & 0 deletions examples/vlm-ties.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
models:
# - model: impactframes/Qwen2-VL-7B-Captioner
- model: maxiw/Florence-2-ScreenQA-base
parameters:
density: [1, 0.7, 0.1] # density gradient
weight: 1.0
# - model: Qwen/Qwen2-VL-7B-Instruct
- model: microsoft/Florence-2-base
parameters:
density: 0.5
weight: [0, 0.3, 0.7, 1] # weight gradient
merge_method: ties
# base_model: Qwen/Qwen2-VL-7B-Instruct
base_model: microsoft/Florence-2-base
parameters:
normalize: true
int8_mask: true
dtype: float16
158 changes: 158 additions & 0 deletions mergekit/_data/architectures/florence.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
{
"model_type": "florence2",
"architectures": [
"Florence2ForConditionalGeneration"
],
"pre_weights": [
{
"name": "language_model.model.shared.weight",
"is_embed": true
}
],
"post_weights": [
{
"name": "image_proj_norm.weight"
},
{
"name": "image_proj_norm.bias"
},
{
"name": "language_model.lm_head.weight",
"is_embed": true,
"aliases": [
"language_model.model.shared.weight"
]
}
],
"num_layers_config_key": "text_config.num_hidden_layers",
"layer_templates": {
"weights": [
{
"name": "language_model.model.encoder.layers.${layer_index}.self_attn.k_proj.weight"
},
{
"name": "language_model.model.encoder.layers.${layer_index}.self_attn.k_proj.bias"
},
{
"name": "language_model.model.encoder.layers.${layer_index}.self_attn.v_proj.weight"
},
{
"name": "language_model.model.encoder.layers.${layer_index}.self_attn.v_proj.bias"
},
{
"name": "language_model.model.encoder.layers.${layer_index}.self_attn.q_proj.weight"
},
{
"name": "language_model.model.encoder.layers.${layer_index}.self_attn.q_proj.bias"
},
{
"name": "language_model.model.encoder.layers.${layer_index}.self_attn.out_proj.weight"
},
{
"name": "language_model.model.encoder.layers.${layer_index}.self_attn.out_proj.bias"
},
{
"name": "language_model.model.encoder.layers.${layer_index}.self_attn_layer_norm.weight"
},
{
"name": "language_model.model.encoder.layers.${layer_index}.self_attn_layer_norm.bias"
},
{
"name": "language_model.model.encoder.layers.${layer_index}.fc1.weight"
},
{
"name": "language_model.model.encoder.layers.${layer_index}.fc1.bias"
},
{
"name": "language_model.model.encoder.layers.${layer_index}.fc2.weight"
},
{
"name": "language_model.model.encoder.layers.${layer_index}.fc2.bias"
},
{
"name": "language_model.model.encoder.layers.${layer_index}.final_layer_norm.weight"
},
{
"name": "language_model.model.encoder.layers.${layer_index}.final_layer_norm.bias"
},
{
"name": "language_model.model.decoder.layers.${layer_index}.self_attn.k_proj.weight"
},
{
"name": "language_model.model.decoder.layers.${layer_index}.self_attn.k_proj.bias"
},
{
"name": "language_model.model.decoder.layers.${layer_index}.self_attn.v_proj.weight"
},
{
"name": "language_model.model.decoder.layers.${layer_index}.self_attn.v_proj.bias"
},
{
"name": "language_model.model.decoder.layers.${layer_index}.self_attn.q_proj.weight"
},
{
"name": "language_model.model.decoder.layers.${layer_index}.self_attn.q_proj.bias"
},
{
"name": "language_model.model.decoder.layers.${layer_index}.self_attn.out_proj.weight"
},
{
"name": "language_model.model.decoder.layers.${layer_index}.self_attn.out_proj.bias"
},
{
"name": "language_model.model.decoder.layers.${layer_index}.self_attn_layer_norm.weight"
},
{
"name": "language_model.model.decoder.layers.${layer_index}.self_attn_layer_norm.bias"
},
{
"name": "language_model.model.decoder.layers.${layer_index}.encoder_attn.k_proj.weight"
},
{
"name": "language_model.model.decoder.layers.${layer_index}.encoder_attn.k_proj.bias"
},
{
"name": "language_model.model.decoder.layers.${layer_index}.encoder_attn.v_proj.weight"
},
{
"name": "language_model.model.decoder.layers.${layer_index}.encoder_attn.v_proj.bias"
},
{
"name": "language_model.model.decoder.layers.${layer_index}.encoder_attn.q_proj.weight"
},
{
"name": "language_model.model.decoder.layers.${layer_index}.encoder_attn.q_proj.bias"
},
{
"name": "language_model.model.decoder.layers.${layer_index}.encoder_attn.out_proj.weight"
},
{
"name": "language_model.model.decoder.layers.${layer_index}.encoder_attn.out_proj.bias"
},
{
"name": "language_model.model.decoder.layers.${layer_index}.encoder_attn_layer_norm.weight"
},
{
"name": "language_model.model.decoder.layers.${layer_index}.encoder_attn_layer_norm.bias"
},
{
"name": "language_model.model.decoder.layers.${layer_index}.fc1.weight"
},
{
"name": "language_model.model.decoder.layers.${layer_index}.fc1.bias"
},
{
"name": "language_model.model.decoder.layers.${layer_index}.fc2.weight"
},
{
"name": "language_model.model.decoder.layers.${layer_index}.fc2.bias"
},
{
"name": "language_model.model.decoder.layers.${layer_index}.final_layer_norm.weight"
},
{
"name": "language_model.model.decoder.layers.${layer_index}.final_layer_norm.bias"
}
]
}
}
101 changes: 101 additions & 0 deletions mergekit/_data/architectures/qwen2_vl.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
{
"model_type": "qwen2",
"architectures": [
"Qwen2VLForConditionalGeneration"
],
"pre_weights": [
{
"name": "visual.patch_embed.proj.weight",
"is_embed": true
}
],
"post_weights": [
{
"name": "model.norm.weight"
},
{
"name": "lm_head.weight",
"is_embed": true,
"aliases": [
"model.embed_tokens.weight"
]
}
],
"num_layers_config_key": "num_hidden_layers",
"layer_templates": {
"weights": [
{
"name": "visual.blocks.${layer_index}.norm1.weight"
},
{
"name": "visual.blocks.${layer_index}.norm1.bias"
},
{
"name": "visual.blocks.${layer_index}.norm2.weight"
},
{
"name": "visual.blocks.${layer_index}.norm2.bias"
},
{
"name": "visual.blocks.${layer_index}.attn.qkv.weight"
},
{
"name": "visual.blocks.${layer_index}.attn.qkv.bias"
},
{
"name": "visual.blocks.${layer_index}.attn.proj.weight"
},
{
"name": "visual.blocks.${layer_index}.attn.proj.bias"
},
{
"name": "visual.blocks.${layer_index}.mlp.fc1.weight"
},
{
"name": "visual.blocks.${layer_index}.mlp.fc1.bias"
},
{
"name": "visual.blocks.${layer_index}.mlp.fc2.weight"
},
{
"name": "visual.blocks.${layer_index}.mlp.fc2.bias"
},
{
"name": "model.layers.${layer_index}.self_attn.q_proj.weight"
},
{
"name": "model.layers.${layer_index}.self_attn.q_proj.bias"
},
{
"name": "model.layers.${layer_index}.self_attn.k_proj.weight"
},
{
"name": "model.layers.${layer_index}.self_attn.k_proj.bias"
},
{
"name": "model.layers.${layer_index}.self_attn.v_proj.weight"
},
{
"name": "model.layers.${layer_index}.self_attn.v_proj.bias"
},
{
"name": "model.layers.${layer_index}.self_attn.o_proj.weight"
},
{
"name": "model.layers.${layer_index}.mlp.gate_proj.weight"
},
{
"name": "model.layers.${layer_index}.mlp.up_proj.weight"
},
{
"name": "model.layers.${layer_index}.mlp.down_proj.weight"
},
{
"name": "model.layers.${layer_index}.input_layernorm.weight"
},
{
"name": "model.layers.${layer_index}.post_attention_layernorm.weight"
}
]
}
}
10 changes: 9 additions & 1 deletion mergekit/architecture.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,15 @@ def num_layers_config_key(self) -> str:

def num_layers(self, config: PretrainedConfig) -> int:
"""Return the number of layers in a model."""
return getattr(config, self.num_layers_config_key())
# Split the num_layers_config_key by '.' to handle nested attributes
keys = self.num_layers_config_key().split('.')

# Traverse the nested attributes based on the keys
attr = config
for key in keys:
attr = getattr(attr, key)

return attr

def all_weights(self, config: PretrainedConfig) -> List[WeightInfo]:
"""Return all weights associated with a model."""
Expand Down

0 comments on commit e9e19bf

Please sign in to comment.