From b817c56a27d6cc6bd3b473d3bc8848a5edbc9c71 Mon Sep 17 00:00:00 2001 From: Zhenzhong1 Date: Tue, 5 Mar 2024 23:12:56 -0800 Subject: [PATCH] fixed qwen convert issues. --- neural_speed/convert/convert_qwen.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/neural_speed/convert/convert_qwen.py b/neural_speed/convert/convert_qwen.py index 7966717b2..bcdacbdc2 100644 --- a/neural_speed/convert/convert_qwen.py +++ b/neural_speed/convert/convert_qwen.py @@ -100,11 +100,13 @@ def main(args_in: Optional[List[str]] = None) -> None: fout.write(struct.pack("i", hparams["num_attention_heads"])) fout.write(struct.pack("i", 0)) # multi-query attention fout.write(struct.pack("i", hparams["num_hidden_layers"])) - fout.write(struct.pack("i", hparams["kv_channels"] if "kv_channels" in hparams - else int(hparams["hidden_size"]/hparams["num_attention_heads"]))) + fout.write( + struct.pack( + "i", hparams["kv_channels"] if "kv_channels" in hparams else int(hparams["hidden_size"] / + hparams["num_attention_heads"]))) fout.write(struct.pack("i", ftype)) - fout.write(struct.pack("i", hparams["seq_length"] if "seq_length" in hparams - else hparams["max_position_embeddings"])) + fout.write( + struct.pack("i", hparams["seq_length"] if "seq_length" in hparams else hparams["max_position_embeddings"])) fout.write(struct.pack("f", 0.0)) fout.write(struct.pack("f", 0.0)) fout.write(struct.pack("i", 0)) @@ -120,13 +122,15 @@ def main(args_in: Optional[List[str]] = None) -> None: fout.write(struct.pack("f", 10000.0)) # freq_base fout.write(struct.pack("f", 1.0)) # rope_factor - fout.write(struct.pack("f", 0.0)) # config.json "rope_scaling.factor", not enabled - fout.write(struct.pack("i", 0)) # rope_scaling.original_max_position_embeddings - fout.write(struct.pack("i", 0)) # params["rope_scaling"]["type"] =="yarn" else 0)) - fout.write(struct.pack("i", hparams["bos_token_id"] if hparams["bos_token_id"] - else tokenizer.special_tokens['<|endoftext|>'])) - fout.write(struct.pack("i", hparams["eos_token_id"] if hparams["eos_token_id"] - else tokenizer.special_tokens['<|endoftext|>'])) + fout.write(struct.pack("f", 0.0)) # config.json "rope_scaling.factor", not enabled + fout.write(struct.pack("i", 0)) # rope_scaling.original_max_position_embeddings + fout.write(struct.pack("i", 0)) # params["rope_scaling"]["type"] =="yarn" else 0)) + fout.write( + struct.pack( + "i", hparams["bos_token_id"] if "bos_token_id" in hparams else tokenizer.special_tokens['<|endoftext|>'])) + fout.write( + struct.pack( + "i", hparams["eos_token_id"] if "eos_token_id" in hparams else tokenizer.special_tokens['<|endoftext|>'])) fout.write(struct.pack("i", tokenizer.pad_token_id if tokenizer.pad_token_id is not None else -1)) fout.write(struct.pack("i", tokenizer.sep_token_id if tokenizer.sep_token_id is not None else -1))