fix qwen convert error

Signed-off-by: intellinjun <[email protected]>
intel · Jun 11, 2024 · c6ce4e7 · c6ce4e7
1 parent c57d25f
commit c6ce4e7
Show file tree

Hide file tree

Showing 2 changed files with 2 additions and 2 deletions.
diff --git a/neural_speed/convert/convert_quantized_qwen.py b/neural_speed/convert/convert_quantized_qwen.py
@@ -66,7 +66,7 @@ def main(args_in: Optional[List[str]] = None) -> None:
     f.write(struct.pack("i", hparams["intermediate_size"]))  # dummy data
     f.write(struct.pack("i", hparams["num_attention_heads"]))
     f.write(struct.pack("i", hparams["num_key_value_heads"] if "num_key_value_heads" in hparams
-                        else ["num_attention_heads"]))  # multi-query attention
+                        else hparams["num_attention_heads"]))  # multi-query attention
     f.write(struct.pack("i", hparams["num_hidden_layers"]))
     f.write(
         struct.pack(

diff --git a/neural_speed/convert/convert_qwen.py b/neural_speed/convert/convert_qwen.py
@@ -104,7 +104,7 @@ def main(args_in: Optional[List[str]] = None) -> None:
     fout.write(struct.pack("i", hparams["intermediate_size"]))  # dummy data
     fout.write(struct.pack("i", hparams["num_attention_heads"]))
     fout.write(struct.pack("i", hparams["num_key_value_heads"] if "num_key_value_heads" in hparams
-                           else ["num_attention_heads"]))  # multi-query attention
+                           else hparams["num_attention_heads"]))  # multi-query attention
     fout.write(struct.pack("i", hparams["num_hidden_layers"]))
     fout.write(
         struct.pack(