Skip to content

Commit

Permalink
Fix Llava conversion for LlavaQwen2ForCausalLM with Clip vision tower (
Browse files Browse the repository at this point in the history
…#33613)

fix llavaqwen2 model conversion
  • Loading branch information
Isotr0py authored Sep 23, 2024
1 parent 214db9e commit be9cf07
Showing 1 changed file with 7 additions and 4 deletions.
11 changes: 7 additions & 4 deletions src/transformers/models/llava/convert_llava_weights_to_hf.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,9 @@ def load_original_state_dict(model_id):
if "lm_head.weight" not in original_state_dict:
original_state_dict["lm_head.weight"] = original_state_dict["model.embed_tokens.weight"].clone()

del original_state_dict["model.image_newline"] # not used in the original implementation because "merge_type=flat"
if "model.image_newline" in original_state_dict:
# not used in the original implementation because "merge_type=flat"
del original_state_dict["model.image_newline"]
return original_state_dict


Expand Down Expand Up @@ -107,7 +109,7 @@ def convert_llava_llama_to_hf(text_model_id, vision_model_id, output_hub_path, o
image_processor = AutoImageProcessor.from_pretrained(vision_model_id)
processor = LlavaProcessor(tokenizer=tokenizer, image_processor=image_processor)

if "Qwen" in text_model_id:
if "siglip" in vision_model_id:
vision_config = SiglipVisionConfig(
hidden_size=1152,
image_size=384,
Expand All @@ -128,8 +130,9 @@ def convert_llava_llama_to_hf(text_model_id, vision_model_id, output_hub_path, o
# llms-lab interleeave models do not use any selection startegy except for last hidden state
if "Qwen" in text_model_id:
config.image_token_index = 151646
config.vision_feature_select_strategy = "full"
config.vision_feature_layer = -1
if "siglip" in vision_model_id:
config.vision_feature_select_strategy = "full"
config.vision_feature_layer = -1
else:
config.pad_token_id = 32001
config.image_token_index = 32000
Expand Down

0 comments on commit be9cf07

Please sign in to comment.