From b817c56a27d6cc6bd3b473d3bc8848a5edbc9c71 Mon Sep 17 00:00:00 2001
From: Zhenzhong1 <zhenzhong.xu@intel.com>
Date: Tue, 5 Mar 2024 23:12:56 -0800
Subject: [PATCH] fixed qwen convert issues.

---
 neural_speed/convert/convert_qwen.py | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/neural_speed/convert/convert_qwen.py b/neural_speed/convert/convert_qwen.py
index 7966717b2..bcdacbdc2 100644
--- a/neural_speed/convert/convert_qwen.py
+++ b/neural_speed/convert/convert_qwen.py
@@ -100,11 +100,13 @@ def main(args_in: Optional[List[str]] = None) -> None:
     fout.write(struct.pack("i", hparams["num_attention_heads"]))
     fout.write(struct.pack("i", 0))  # multi-query attention
     fout.write(struct.pack("i", hparams["num_hidden_layers"]))
-    fout.write(struct.pack("i", hparams["kv_channels"] if "kv_channels" in hparams
-                           else int(hparams["hidden_size"]/hparams["num_attention_heads"])))
+    fout.write(
+        struct.pack(
+            "i", hparams["kv_channels"] if "kv_channels" in hparams else int(hparams["hidden_size"] /
+                                                                             hparams["num_attention_heads"])))
     fout.write(struct.pack("i", ftype))
-    fout.write(struct.pack("i", hparams["seq_length"] if "seq_length" in hparams
-                           else hparams["max_position_embeddings"]))
+    fout.write(
+        struct.pack("i", hparams["seq_length"] if "seq_length" in hparams else hparams["max_position_embeddings"]))
     fout.write(struct.pack("f", 0.0))
     fout.write(struct.pack("f", 0.0))
     fout.write(struct.pack("i", 0))
@@ -120,13 +122,15 @@ def main(args_in: Optional[List[str]] = None) -> None:
     fout.write(struct.pack("f", 10000.0))  # freq_base
     fout.write(struct.pack("f", 1.0))  # rope_factor
 
-    fout.write(struct.pack("f", 0.0)) # config.json "rope_scaling.factor", not enabled
-    fout.write(struct.pack("i", 0))   # rope_scaling.original_max_position_embeddings
-    fout.write(struct.pack("i", 0))   # params["rope_scaling"]["type"] =="yarn" else 0))
-    fout.write(struct.pack("i", hparams["bos_token_id"] if hparams["bos_token_id"]
-                           else tokenizer.special_tokens['<|endoftext|>']))
-    fout.write(struct.pack("i", hparams["eos_token_id"] if hparams["eos_token_id"]
-                           else tokenizer.special_tokens['<|endoftext|>']))
+    fout.write(struct.pack("f", 0.0))  # config.json "rope_scaling.factor", not enabled
+    fout.write(struct.pack("i", 0))  # rope_scaling.original_max_position_embeddings
+    fout.write(struct.pack("i", 0))  # params["rope_scaling"]["type"] =="yarn" else 0))
+    fout.write(
+        struct.pack(
+            "i", hparams["bos_token_id"] if "bos_token_id" in hparams else tokenizer.special_tokens['<|endoftext|>']))
+    fout.write(
+        struct.pack(
+            "i", hparams["eos_token_id"] if "eos_token_id" in hparams else tokenizer.special_tokens['<|endoftext|>']))
     fout.write(struct.pack("i", tokenizer.pad_token_id if tokenizer.pad_token_id is not None else -1))
     fout.write(struct.pack("i", tokenizer.sep_token_id if tokenizer.sep_token_id is not None else -1))