Skip to content

Commit

Permalink
优化合并转换脚本
Browse files Browse the repository at this point in the history
  • Loading branch information
TylunasLi committed Feb 28, 2024
1 parent 7c37b42 commit f3af399
Show file tree
Hide file tree
Showing 6 changed files with 50 additions and 31 deletions.
13 changes: 12 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ fastllm是纯c++实现,无第三方依赖的高性能大模型推理库
- 🚀 支持流式输出,很方便实现打字机效果
- 🚀 支持python调用
- 🚀 前后端分离设计,便于支持新的计算设备
- 🚀 目前支持ChatGLM模型,各种LLAMA模型(ALPACA, VICUNA等),BAICHUAN模型,MOSS模型
- 🚀 目前支持ChatGLM系列模型,各种LLAMA模型(ALPACA, VICUNA等),BAICHUAN模型,QWEN模型,MOSS模型等

## 两行代码加速 (测试中,暂时只支持chatglm系列)

Expand Down Expand Up @@ -379,6 +379,17 @@ python3 tools/qwen2flm.py qwen-7b-int8.flm int8 #导出int8模型
python3 tools/qwen2flm.py qwen-7b-int4.flm int4 #导出int4模型
```

* **Qwen1.5**

```sh
# 需要先安装QWen2环境(transformers >= 4.37.0)
# 根据所需的精度,导出相应的模型
python3 tools/llamalike2flm.py qwen1.5-7b-fp16.flm float16 "qwen/Qwen1.5-4B-Chat" #导出wen1.5-4B-Chat float16模型
python3 tools/llamalike2flm.py qwen1.5-7b-int8.flm int8 "qwen/Qwen1.5-7B-Chat" #导出Qwen1.5-7B-Chat int8模型
python3 tools/llamalike2flm.py qwen1.5-7b-int4.flm int4 "qwen/Qwen1.5-14B-Chat" #导出Qwen1.5-14B-Chat int4模型
# 最后一个参数可替换为模型路径
```

## 开发计划

也就是俗称的画饼部分,大家如果有需要的功能可以在讨论区提出
Expand Down
7 changes: 7 additions & 0 deletions tools/fastllm_pytools/hf_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,13 @@ def create(model,
modelInfo["im_start_id"] = tokenizer.im_start_id
elif (modelInfo["model_type"] == "qwen2"):
modelInfo["eos_token_id"] = "151645"
elif (modelInfo["model_type"] == "internlm"):
modelInfo["eos_token_id"] = "103028"
if "rotary" in modelInfo:
rope_scaling = modelInfo.pop("rotary")
if isinstance(rope_scaling, builtins.dict):
modelInfo["rope_scaling.type"] = rope_scaling["type"]
modelInfo["rope_theta"] = rope_scaling["base"]
if (modelInfo["model_type"] == "chatglm" and hasattr(tokenizer, "build_chat_input")):
# chatglm3
modelInfo["pre_prompt"] = "";
Expand Down
7 changes: 7 additions & 0 deletions tools/fastllm_pytools/torch2flm.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,13 @@ def tofile(exportPath,
modelInfo["im_start_id"] = tokenizer.im_start_id
elif (modelInfo["model_type"] == "qwen2"):
modelInfo["eos_token_id"] = "151645"
elif (modelInfo["model_type"] == "internlm"):
modelInfo["eos_token_id"] = "103028"
if "rotary" in modelInfo:
rope_scaling = modelInfo.pop("rotary")
if isinstance(rope_scaling, builtins.dict):
modelInfo["rope_scaling.type"] = rope_scaling["type"]
modelInfo["rope_theta"] = rope_scaling["base"]
if (modelInfo["model_type"] == "chatglm" and hasattr(tokenizer, "build_chat_input")):
# chatglm3
modelInfo["pre_prompt"] = "";
Expand Down
16 changes: 0 additions & 16 deletions tools/scripts/internlm2flm.py

This file was deleted.

24 changes: 24 additions & 0 deletions tools/scripts/llamalike2flm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import sys
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from fastllm_pytools import torch2flm

if __name__ == "__main__":
modelNameOrPath = sys.argv[3] if len(sys.argv) >= 4 else 'qwen/Qwen1.5-7B-Chat'
tokenizer = AutoTokenizer.from_pretrained(modelNameOrPath, trust_remote_code=True);
# `torch_dtype=torch.float16` is set by default, if it will not cause an OOM Error, you can load model in float32.
model = AutoModelForCausalLM.from_pretrained(modelNameOrPath, trust_remote_code=True, torch_dtype=torch.float16)
model = model.eval()
dtype = sys.argv[2] if len(sys.argv) >= 3 else "float16"
exportPath = sys.argv[1] if len(sys.argv) >= 2 else model.config.model_type + "-7b-" + dtype + ".flm"
if model.config.model_type == "internlm":
torch2flm.tofile(exportPath, model, tokenizer, pre_prompt = "<s>",
user_role = "<|User|>:", bot_role = "<eoh>\n<|Bot|>:",
history_sep = "<eoa>\n<s>", dtype = dtype)
elif model.config.model_type == "qwen2":
torch2flm.tofile(exportPath, model, tokenizer, pre_prompt="<|im_start|>system\nYou are a helpful assistant.<|im_end|>", user_role="<|im_start|>user\n",
bot_role="<|im_end|><|im_start|>assistant\n", history_sep="<|im_end|>\n", dtype = dtype)
# add custom code here
else:
torch2flm.tofile(exportPath, model, tokenizer, pre_prompt = "", user_role = "",
bot_role = "", history_sep = "", dtype = dtype)
14 changes: 0 additions & 14 deletions tools/scripts/qwen2_2flm.py

This file was deleted.

0 comments on commit f3af399

Please sign in to comment.