diff --git a/README.md b/README.md index 4eafb27e..c95ac6d9 100644 --- a/README.md +++ b/README.md @@ -336,6 +336,18 @@ python3 tools/baichuan2flm.py baichuan-13b-int8.flm int8 #导出int8模型 python3 tools/baichuan2flm.py baichuan-13b-int4.flm int4 #导出int4模型 ``` +### baichuan2模型导出 (默认脚本导出baichuan2-7b-chat模型) + +``` sh +# 需要先安装baichuan2环境 +# 如果使用自己finetune的模型需要修改baichuan2_2flm.py文件中创建tokenizer, model的代码 +# 根据所需的精度,导出相应的模型 +cd build +python3 tools/baichuan2_2flm.py baichuan2-7b-fp16.flm float16 #导出float16模型 +python3 tools/baichuan2_2flm.py baichuan2-7b-int8.flm int8 #导出int8模型 +python3 tools/baichuan2_2flm.py baichuan2-7b-int4.flm int4 #导出int4模型 +``` + ### MOSS模型导出 ``` sh diff --git a/tools/fastllm_pytools/torch2flm.py b/tools/fastllm_pytools/torch2flm.py index 25831adc..b81387f9 100644 --- a/tools/fastllm_pytools/torch2flm.py +++ b/tools/fastllm_pytools/torch2flm.py @@ -94,6 +94,12 @@ def tofile(exportPath, modelInfo["user_role"] = ("") if hasattr(model.generation_config, "user_token_id") else ""; modelInfo["bot_role"] = ("") if hasattr(model.generation_config, "assistant_token_id") else ""; modelInfo["history_sep"] = "" + if (modelInfo["model_type"] == "baichuan" and modelInfo["vocab_size"] == 125696): + # Baichuan 2代 7B + modelInfo["pre_prompt"] = "" + modelInfo["user_role"] = ("") if hasattr(model.generation_config, "user_token_id") else ""; + modelInfo["bot_role"] = ("") if hasattr(model.generation_config, "assistant_token_id") else ""; + modelInfo["history_sep"] = "" if modelInfo["model_type"] == "qwen": if modelInfo["chat_format"] == "chatml": modelInfo["im_end_id"] = tokenizer.im_end_id diff --git a/tools/scripts/baichuan2_2flm.py b/tools/scripts/baichuan2_2flm.py new file mode 100644 index 00000000..ff3b2fed --- /dev/null +++ b/tools/scripts/baichuan2_2flm.py @@ -0,0 +1,24 @@ +import sys +import torch +from transformers import AutoModelForCausalLM, AutoTokenizer +from transformers.generation.utils import GenerationConfig +from fastllm_pytools import torch2flm + +if __name__ == "__main__": + modelpath = "baichuan-inc/Baichuan2-7B-Chat" + tokenizer = AutoTokenizer.from_pretrained(modelpath, use_fast=False, trust_remote_code=True) + model = AutoModelForCausalLM.from_pretrained(modelpath, device_map="auto", torch_dtype=torch.float32, trust_remote_code=True) + + # normalize lm_head + state_dict = model.state_dict() + state_dict['lm_head.weight'] = torch.nn.functional.normalize(state_dict['lm_head.weight']) + model.load_state_dict(state_dict) + + try: + model.generation_config = GenerationConfig.from_pretrained(modelpath) + except: + pass + + dtype = sys.argv[2] if len(sys.argv) >= 3 else "float16" + exportPath = sys.argv[1] if len(sys.argv) >= 2 else "baichuan2-7b-" + dtype + ".flm" + torch2flm.tofile(exportPath, model.to('cpu'), tokenizer, dtype=dtype) \ No newline at end of file diff --git a/tools/scripts/baichuan2flm.py b/tools/scripts/baichuan2flm.py index d282cfca..bcfea7c3 100644 --- a/tools/scripts/baichuan2flm.py +++ b/tools/scripts/baichuan2flm.py @@ -14,5 +14,5 @@ except: pass dtype = sys.argv[2] if len(sys.argv) >= 3 else "float16" - exportPath = sys.argv[1] if len(sys.argv) >= 2 else "baichuan-13b-' + dtype + '.flm" + exportPath = sys.argv[1] if len(sys.argv) >= 2 else "baichuan-13b-" + dtype + ".flm" torch2flm.tofile(exportPath, model, tokenizer, dtype = dtype)