MiniCpmModel类作为LlamaModel类的子类，优化脚本

TylunasLi · Mar 4, 2024 · 97db288 · 97db288
1 parent a0f2212
commit 97db288
Show file tree

Hide file tree

Showing 4 changed files with 49 additions and 591 deletions.
diff --git a/README.md b/README.md
@@ -370,33 +370,38 @@ python3 tools/moss_export.py moss-int4.flm int4 #导出int4模型
 一些模型的转换可以[参考这里的例子](docs/llama_cookbook.md)
 
 #### QWEN模型导出
+* **Qwen**
 ```sh
 # 需要先安装QWen环境
 # 如果使用自己finetune的模型需要修改qwen2flm.py文件中创建tokenizer, model的代码
 # 根据所需的精度，导出相应的模型
+cd build
 python3 tools/qwen2flm.py qwen-7b-fp16.flm float16 #导出float16模型
 python3 tools/qwen2flm.py qwen-7b-int8.flm int8 #导出int8模型
 python3 tools/qwen2flm.py qwen-7b-int4.flm int4 #导出int4模型
 ```
 
-#### MINICPM模型导出
-```sh
-cd build 
-python tools/minicpm2flm.py #导出float16模型
-./main -p minicpm-2b-float16.flm # 执行模型
-```
-
 * **Qwen1.5**
 
 ```sh
 # 需要先安装QWen2环境（transformers >= 4.37.0）
 # 根据所需的精度，导出相应的模型
+cd build
 python3 tools/llamalike2flm.py qwen1.5-7b-fp16.flm float16 "qwen/Qwen1.5-4B-Chat" #导出wen1.5-4B-Chat float16模型
 python3 tools/llamalike2flm.py qwen1.5-7b-int8.flm int8 "qwen/Qwen1.5-7B-Chat" #导出Qwen1.5-7B-Chat int8模型
 python3 tools/llamalike2flm.py qwen1.5-7b-int4.flm int4 "qwen/Qwen1.5-14B-Chat" #导出Qwen1.5-14B-Chat int4模型
 # 最后一个参数可替换为模型路径
 ```
 
+#### MINICPM模型导出
+```sh
+# 需要先安装MiniCPM环境（transformers >= 4.36.0） 
+# 默认脚本导出iniCPM-2B-dpo-fp16模型
+cd build 
+python tools/minicpm2flm.py minicpm-2b-float16.flm #导出dpo-float16模型
+./main -p minicpm-2b-float16.flm # 执行模型
+```
+
 ## 开发计划
 
 也就是俗称的画饼部分，大家如果有需要的功能可以在讨论区提出

diff --git a/include/models/minicpm.h b/include/models/minicpm.h
@@ -6,12 +6,13 @@
 #define FASTLLM_MINICPM_H
 
 #include "basellm.h"
+#include "llama.h"
 #include "cmath"
 
 #include <iostream>
 
 namespace fastllm {
-    class MiniCpmModel: public basellm {
+    class MiniCpmModel: public LlamaModel {
     public:
         MiniCpmModel(); // 构造函数
 
@@ -48,26 +49,6 @@ namespace fastllm {
                 const LastTokensManager &lastTokens = LastTokensManager(),
                 std::vector <std::vector <float>*> *logits = nullptr);
 
-        virtual std::string Response(const std::string& input,
-                                     RuntimeResult retCb,
-                                     const GenerationConfig &generationConfig = GenerationConfig()); // 根据给出的内容回复
-
-        virtual void ResponseBatch(const std::vector <std::string> &inputs,
-                                   std::vector <std::string> &outputs,
-                                   RuntimeResultBatch retCb,
-                                   const GenerationConfig &generationConfig = GenerationConfig());
-
-        virtual int LaunchResponseTokens(const std::vector <int> &inputTokens,
-                                         const GenerationConfig &generationConfig = GenerationConfig()); // 启动一个response任务，返回分配的handleId
-
-        virtual int FetchResponseTokens(int handelId); // 获取指定handle的输出, -1代表输出结束了
-
-        virtual void WarmUp(); // 预热
-
-        virtual std::string MakeInput(const std::string &history, int round, const std::string &input); // 根据历史信息和当前输入生成prompt
-
-        virtual std::string MakeHistory(const std::string &history, int round, const std::string &input, const std::string &output); // 根据当前回复更新history
-
     private:
         float embed_scale = 1.f;