Skip to content

Commit

Permalink
Merge branch 'master' into llama
Browse files Browse the repository at this point in the history
  • Loading branch information
TylunasLi authored Feb 29, 2024
2 parents 4a0384f + 3728911 commit 7e1d704
Show file tree
Hide file tree
Showing 6 changed files with 1,191 additions and 5 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ endif()

message(STATUS "CMAKE_CXX_FLAGS" ${CMAKE_CXX_FLAGS})
set(FASTLLM_CXX_SOURCES src/fastllm.cpp src/device.cpp src/model.cpp src/executor.cpp
src/devices/cpu/cpudevice.cpp src/devices/cpu/cpudevicebatch.cpp
src/devices/cpu/cpudevice.cpp src/devices/cpu/cpudevicebatch.cpp src/models/minicpm.cpp
src/models/chatglm.cpp src/models/moss.cpp src/models/llama.cpp src/models/qwen.cpp src/models/basellm.cpp src/models/glm.cpp)

include_directories(include)
Expand Down
71 changes: 71 additions & 0 deletions include/models/minicpm.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
//
// Created by huangyuyang on 6/1/23.
//

#ifndef FASTLLM_MINICPM_H
#define FASTLLM_MINICPM_H

#include "basellm.h"
#include "cmath"

#include <iostream>

namespace fastllm {
class MiniCpmModel: public basellm {
public:
MiniCpmModel(); // 构造函数

// 推理
virtual int Forward(
const Data &inputIds,
const Data &attentionMask,
const Data &positionIds,
std::vector <std::pair <Data, Data> > &pastKeyValues,
const GenerationConfig &generationConfig = GenerationConfig(),
const LastTokensManager &lastTokens = LastTokensManager(),
std::vector <float> *logits = nullptr);

std::vector <int> ForwardBatch(
int batch,
const Data &inputIds,
const Data &attentionMask,
const Data &positionIds,
std::vector <std::pair <Data, Data> > &pastKeyValues,
const GenerationConfig &generationConfig = GenerationConfig(),
const LastTokensManager &lastTokens = LastTokensManager(),
std::vector <std::vector <float>*> *logits = nullptr);

std::vector <int> ForwardBatch(
int batch,
const Data &inputIds,
const std::vector <Data*> &attentionMask,
const std::vector <Data*> &positionIds,
const std::vector <int> &seqLens,
std::vector <std::pair <Data*, Data*> > &pastKeyValues,
const std::vector <GenerationConfig> &generationConfigs,
const LastTokensManager &lastTokens = LastTokensManager(),
std::vector <std::vector <float>*> *logits = nullptr);

virtual std::string Response(const std::string& input,
RuntimeResult retCb,
const GenerationConfig &generationConfig = GenerationConfig()); // 根据给出的内容回复

virtual void ResponseBatch(const std::vector <std::string> &inputs,
std::vector <std::string> &outputs,
RuntimeResultBatch retCb,
const GenerationConfig &generationConfig = GenerationConfig());

virtual int LaunchResponseTokens(const std::vector <int> &inputTokens,
const GenerationConfig &generationConfig = GenerationConfig()); // 启动一个response任务,返回分配的handleId

virtual int FetchResponseTokens(int handelId); // 获取指定handle的输出, -1代表输出结束了

virtual void WarmUp(); // 预热

virtual std::string MakeInput(const std::string &history, int round, const std::string &input); // 根据历史信息和当前输入生成prompt

virtual std::string MakeHistory(const std::string &history, int round, const std::string &input, const std::string &output); // 根据当前回复更新history
};
}

#endif //FASTLLM_MINICPM_H
3 changes: 3 additions & 0 deletions src/model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "llama.h"
#include "qwen.h"
#include "glm.h"
#include "minicpm.h"

namespace fastllm {
void basellm::LoadFromFile(const std::string &fileName) {
Expand Down Expand Up @@ -106,6 +107,8 @@ namespace fastllm {
} else if (modelType == "qwen2") {
model = new LlamaModel();
model->model_type = "qwen";
} else if (modelType=="minicpm") {
model = (basellm*)(new MiniCpmModel());
} else if (modelType == "qwen") {
model = (basellm *) (new QWenModel());
model->weight.tokenizer.type = Tokenizer::TokenizerType::QWEN;
Expand Down
Loading

0 comments on commit 7e1d704

Please sign in to comment.