From e9e19bfaa45e741478659b9549fca6b0408268fb Mon Sep 17 00:00:00 2001 From: Elliot Stein Date: Thu, 10 Oct 2024 16:22:12 +0000 Subject: [PATCH] Add custom architecture files to enable VLM merging of florence and qwen-vl models. Minor change to architecture.py to handle nested config keys --- examples/vlm-linear.yml | 11 ++ examples/vlm-ties.yml | 18 +++ mergekit/_data/architectures/florence.json | 158 +++++++++++++++++++++ mergekit/_data/architectures/qwen2_vl.json | 101 +++++++++++++ mergekit/architecture.py | 10 +- 5 files changed, 297 insertions(+), 1 deletion(-) create mode 100644 examples/vlm-linear.yml create mode 100644 examples/vlm-ties.yml create mode 100644 mergekit/_data/architectures/florence.json create mode 100644 mergekit/_data/architectures/qwen2_vl.json diff --git a/examples/vlm-linear.yml b/examples/vlm-linear.yml new file mode 100644 index 00000000..c44f330c --- /dev/null +++ b/examples/vlm-linear.yml @@ -0,0 +1,11 @@ +models: + # - model: Qwen/Qwen2-VL-7B-Instruct + - model: microsoft/Florence-2-base + parameters: + weight: 1.0 + # - model: impactframes/Qwen2-VL-7B-Captioner + - model: maxiw/Florence-2-ScreenQA-base + parameters: + weight: 1.0 +merge_method: linear +dtype: float16 \ No newline at end of file diff --git a/examples/vlm-ties.yml b/examples/vlm-ties.yml new file mode 100644 index 00000000..e83402b1 --- /dev/null +++ b/examples/vlm-ties.yml @@ -0,0 +1,18 @@ +models: + # - model: impactframes/Qwen2-VL-7B-Captioner + - model: maxiw/Florence-2-ScreenQA-base + parameters: + density: [1, 0.7, 0.1] # density gradient + weight: 1.0 + # - model: Qwen/Qwen2-VL-7B-Instruct + - model: microsoft/Florence-2-base + parameters: + density: 0.5 + weight: [0, 0.3, 0.7, 1] # weight gradient +merge_method: ties +# base_model: Qwen/Qwen2-VL-7B-Instruct +base_model: microsoft/Florence-2-base +parameters: + normalize: true + int8_mask: true +dtype: float16 \ No newline at end of file diff --git a/mergekit/_data/architectures/florence.json b/mergekit/_data/architectures/florence.json new file mode 100644 index 00000000..ca24e0ef --- /dev/null +++ b/mergekit/_data/architectures/florence.json @@ -0,0 +1,158 @@ +{ + "model_type": "florence2", + "architectures": [ + "Florence2ForConditionalGeneration" + ], + "pre_weights": [ + { + "name": "language_model.model.shared.weight", + "is_embed": true + } + ], + "post_weights": [ + { + "name": "image_proj_norm.weight" + }, + { + "name": "image_proj_norm.bias" + }, + { + "name": "language_model.lm_head.weight", + "is_embed": true, + "aliases": [ + "language_model.model.shared.weight" + ] + } + ], + "num_layers_config_key": "text_config.num_hidden_layers", + "layer_templates": { + "weights": [ + { + "name": "language_model.model.encoder.layers.${layer_index}.self_attn.k_proj.weight" + }, + { + "name": "language_model.model.encoder.layers.${layer_index}.self_attn.k_proj.bias" + }, + { + "name": "language_model.model.encoder.layers.${layer_index}.self_attn.v_proj.weight" + }, + { + "name": "language_model.model.encoder.layers.${layer_index}.self_attn.v_proj.bias" + }, + { + "name": "language_model.model.encoder.layers.${layer_index}.self_attn.q_proj.weight" + }, + { + "name": "language_model.model.encoder.layers.${layer_index}.self_attn.q_proj.bias" + }, + { + "name": "language_model.model.encoder.layers.${layer_index}.self_attn.out_proj.weight" + }, + { + "name": "language_model.model.encoder.layers.${layer_index}.self_attn.out_proj.bias" + }, + { + "name": "language_model.model.encoder.layers.${layer_index}.self_attn_layer_norm.weight" + }, + { + "name": "language_model.model.encoder.layers.${layer_index}.self_attn_layer_norm.bias" + }, + { + "name": "language_model.model.encoder.layers.${layer_index}.fc1.weight" + }, + { + "name": "language_model.model.encoder.layers.${layer_index}.fc1.bias" + }, + { + "name": "language_model.model.encoder.layers.${layer_index}.fc2.weight" + }, + { + "name": "language_model.model.encoder.layers.${layer_index}.fc2.bias" + }, + { + "name": "language_model.model.encoder.layers.${layer_index}.final_layer_norm.weight" + }, + { + "name": "language_model.model.encoder.layers.${layer_index}.final_layer_norm.bias" + }, + { + "name": "language_model.model.decoder.layers.${layer_index}.self_attn.k_proj.weight" + }, + { + "name": "language_model.model.decoder.layers.${layer_index}.self_attn.k_proj.bias" + }, + { + "name": "language_model.model.decoder.layers.${layer_index}.self_attn.v_proj.weight" + }, + { + "name": "language_model.model.decoder.layers.${layer_index}.self_attn.v_proj.bias" + }, + { + "name": "language_model.model.decoder.layers.${layer_index}.self_attn.q_proj.weight" + }, + { + "name": "language_model.model.decoder.layers.${layer_index}.self_attn.q_proj.bias" + }, + { + "name": "language_model.model.decoder.layers.${layer_index}.self_attn.out_proj.weight" + }, + { + "name": "language_model.model.decoder.layers.${layer_index}.self_attn.out_proj.bias" + }, + { + "name": "language_model.model.decoder.layers.${layer_index}.self_attn_layer_norm.weight" + }, + { + "name": "language_model.model.decoder.layers.${layer_index}.self_attn_layer_norm.bias" + }, + { + "name": "language_model.model.decoder.layers.${layer_index}.encoder_attn.k_proj.weight" + }, + { + "name": "language_model.model.decoder.layers.${layer_index}.encoder_attn.k_proj.bias" + }, + { + "name": "language_model.model.decoder.layers.${layer_index}.encoder_attn.v_proj.weight" + }, + { + "name": "language_model.model.decoder.layers.${layer_index}.encoder_attn.v_proj.bias" + }, + { + "name": "language_model.model.decoder.layers.${layer_index}.encoder_attn.q_proj.weight" + }, + { + "name": "language_model.model.decoder.layers.${layer_index}.encoder_attn.q_proj.bias" + }, + { + "name": "language_model.model.decoder.layers.${layer_index}.encoder_attn.out_proj.weight" + }, + { + "name": "language_model.model.decoder.layers.${layer_index}.encoder_attn.out_proj.bias" + }, + { + "name": "language_model.model.decoder.layers.${layer_index}.encoder_attn_layer_norm.weight" + }, + { + "name": "language_model.model.decoder.layers.${layer_index}.encoder_attn_layer_norm.bias" + }, + { + "name": "language_model.model.decoder.layers.${layer_index}.fc1.weight" + }, + { + "name": "language_model.model.decoder.layers.${layer_index}.fc1.bias" + }, + { + "name": "language_model.model.decoder.layers.${layer_index}.fc2.weight" + }, + { + "name": "language_model.model.decoder.layers.${layer_index}.fc2.bias" + }, + { + "name": "language_model.model.decoder.layers.${layer_index}.final_layer_norm.weight" + }, + { + "name": "language_model.model.decoder.layers.${layer_index}.final_layer_norm.bias" + } + ] + } +} \ No newline at end of file diff --git a/mergekit/_data/architectures/qwen2_vl.json b/mergekit/_data/architectures/qwen2_vl.json new file mode 100644 index 00000000..aee036e8 --- /dev/null +++ b/mergekit/_data/architectures/qwen2_vl.json @@ -0,0 +1,101 @@ +{ + "model_type": "qwen2", + "architectures": [ + "Qwen2VLForConditionalGeneration" + ], + "pre_weights": [ + { + "name": "visual.patch_embed.proj.weight", + "is_embed": true + } + ], + "post_weights": [ + { + "name": "model.norm.weight" + }, + { + "name": "lm_head.weight", + "is_embed": true, + "aliases": [ + "model.embed_tokens.weight" + ] + } + ], + "num_layers_config_key": "num_hidden_layers", + "layer_templates": { + "weights": [ + { + "name": "visual.blocks.${layer_index}.norm1.weight" + }, + { + "name": "visual.blocks.${layer_index}.norm1.bias" + }, + { + "name": "visual.blocks.${layer_index}.norm2.weight" + }, + { + "name": "visual.blocks.${layer_index}.norm2.bias" + }, + { + "name": "visual.blocks.${layer_index}.attn.qkv.weight" + }, + { + "name": "visual.blocks.${layer_index}.attn.qkv.bias" + }, + { + "name": "visual.blocks.${layer_index}.attn.proj.weight" + }, + { + "name": "visual.blocks.${layer_index}.attn.proj.bias" + }, + { + "name": "visual.blocks.${layer_index}.mlp.fc1.weight" + }, + { + "name": "visual.blocks.${layer_index}.mlp.fc1.bias" + }, + { + "name": "visual.blocks.${layer_index}.mlp.fc2.weight" + }, + { + "name": "visual.blocks.${layer_index}.mlp.fc2.bias" + }, + { + "name": "model.layers.${layer_index}.self_attn.q_proj.weight" + }, + { + "name": "model.layers.${layer_index}.self_attn.q_proj.bias" + }, + { + "name": "model.layers.${layer_index}.self_attn.k_proj.weight" + }, + { + "name": "model.layers.${layer_index}.self_attn.k_proj.bias" + }, + { + "name": "model.layers.${layer_index}.self_attn.v_proj.weight" + }, + { + "name": "model.layers.${layer_index}.self_attn.v_proj.bias" + }, + { + "name": "model.layers.${layer_index}.self_attn.o_proj.weight" + }, + { + "name": "model.layers.${layer_index}.mlp.gate_proj.weight" + }, + { + "name": "model.layers.${layer_index}.mlp.up_proj.weight" + }, + { + "name": "model.layers.${layer_index}.mlp.down_proj.weight" + }, + { + "name": "model.layers.${layer_index}.input_layernorm.weight" + }, + { + "name": "model.layers.${layer_index}.post_attention_layernorm.weight" + } + ] + } +} \ No newline at end of file diff --git a/mergekit/architecture.py b/mergekit/architecture.py index 4c7b4625..d9e89f53 100644 --- a/mergekit/architecture.py +++ b/mergekit/architecture.py @@ -107,7 +107,15 @@ def num_layers_config_key(self) -> str: def num_layers(self, config: PretrainedConfig) -> int: """Return the number of layers in a model.""" - return getattr(config, self.num_layers_config_key()) + # Split the num_layers_config_key by '.' to handle nested attributes + keys = self.num_layers_config_key().split('.') + + # Traverse the nested attributes based on the keys + attr = config + for key in keys: + attr = getattr(attr, key) + + return attr def all_weights(self, config: PretrainedConfig) -> List[WeightInfo]: """Return all weights associated with a model."""