From 8c7eb27c5345280c67c2d0890560f4f2b0d75291 Mon Sep 17 00:00:00 2001 From: cgli Date: Sun, 1 Dec 2024 21:45:51 +0800 Subject: [PATCH] =?UTF-8?q?=E6=94=AF=E6=8C=81=E7=9B=B4=E6=8E=A5=E5=8A=A0?= =?UTF-8?q?=E8=BD=BDQwen=EF=BC=88=E4=B8=80=E4=BB=A3=EF=BC=89=E7=9A=84HF?= =?UTF-8?q?=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/models.md | 8 ++++---- src/model.cpp | 14 +++++++++++++- src/models/qwen.cpp | 5 +++++ 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/docs/models.md b/docs/models.md index 17929b4..8860da7 100644 --- a/docs/models.md +++ b/docs/models.md @@ -50,10 +50,10 @@ | 模型 | 加载后转换 | 离线转换 | 直接读取 | |-------------------: |------------|------------|------------| -| Qwen/Qwen-7B-Chat | [✔](#其它模型) | [✔](#qwen模型导出) | | -| Qwen/Qwen-14B-Chat | [✔](#其它模型) | [✔](#qwen模型导出) | | -| Qwen/Qwen-72B-Chat | [✔](#其它模型) | [✔](#qwen模型导出) | | -| Qwen/Qwen-1_8B-Chat | [✔](#其它模型) | [✔](#qwen模型导出) | | +| Qwen/Qwen-7B-Chat | [✔](#其它模型) | [✔](#qwen模型导出) | ✔ | +| Qwen/Qwen-14B-Chat | [✔](#其它模型) | [✔](#qwen模型导出) | ✔ | +| Qwen/Qwen-72B-Chat | [✔](#其它模型) | [✔](#qwen模型导出) | √ | +| Qwen/Qwen-1_8B-Chat | [✔](#其它模型) | [✔](#qwen模型导出) | ✔ | | Qwen/Qwen1.5-0.5B-Chat | [✔](#其它模型) | [✔](#qwen模型导出) | ✔3 | | Qwen/Qwen1.5-1.8B-Chat | [✔](#其它模型) | [✔](#qwen模型导出) | ✔3 | | Qwen/Qwen1.5-4B-Chat | [✔](#其它模型) | [✔](#qwen模型导出) | ✔3 | diff --git a/src/model.cpp b/src/model.cpp index 2314ba7..c118cce 100644 --- a/src/model.cpp +++ b/src/model.cpp @@ -524,6 +524,18 @@ namespace fastllm { model->history_sep = ""; model->weight.tokenizer.type = Tokenizer::TokenizerType::QWEN; model->weight.tokenizer.chatTemplate = ""; + } else if (tokenizerClass == "QWenTokenizer") { + // Qwen用的分词 + std::vector lines, line; + SplitString(ReadAllFile(path + "qwen.tiktoken"), {'\n'}, lines); + for (int i = 0; i < lines.size(); i++) { + SplitString(lines[i], {' '}, line); + model->weight.AddTokenizerWord(Base64Decode(line[0]), atoi(line[1].c_str()), 1.0f); + } + model->weight.tokenizer.type = Tokenizer::TokenizerType::QWEN; + model->weight.tokenizer.chatTemplate = ""; + model->weight.dicts["im_end_id"] = std::to_string(lines.size() + 1); + model->weight.dicts["im_start_id"] = std::to_string(lines.size() + 2); } else { ErrorInFastLLM("Unsupport tokenizer_class: " + tokenizerClass); } @@ -637,7 +649,7 @@ namespace fastllm { for (auto &it : generation_config.object_items()) { if ("eos_token_id" == it.first && it.second.type() == json11::Json::ARRAY) continue; - model->weight.AddDict(it.first, it.second.dump().c_str()); + model->weight.AddDict(it.first, it.second.is_string() ? it.second.string_value() : it.second.dump()); } // 更新eos_token_id if (generation_config["eos_token_id"].is_array()) { diff --git a/src/models/qwen.cpp b/src/models/qwen.cpp index 1c1ddfa..abad7c3 100644 --- a/src/models/qwen.cpp +++ b/src/models/qwen.cpp @@ -56,6 +56,11 @@ namespace fastllm { } weight.embeddingNames.insert("transformer.wte.weight"); + weight.linearNames = { + "lm_head.weight", "transformer.h.*.ln_1.weight", "transformer.h.*.attn.c_attn.weight", + "transformer.h.*.attn.c_proj.weight", "transformer.h.*.ln_2.weight", + "transformer.h.*.mlp.w1.weight", "transformer.h.*.mlp.w2.weight", "transformer.h.*.mlp.c_proj.weight" + }; } int QWenModel::Forward(const Data &inputIds,