Skip to content

Commit

Permalink
修复Win32Demo CPU构建错误
Browse files Browse the repository at this point in the history
  • Loading branch information
TylunasLi committed Mar 19, 2024
1 parent 97db288 commit 1eb74ed
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 10 deletions.
3 changes: 1 addition & 2 deletions example/Win32Demo/fastllm.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,6 @@
<AdditionalOptions>/arch:AVX /source-charset:utf-8 %(AdditionalOptions)</AdditionalOptions>
</ClCompile>
<Link>
<AdditionalDependencies>cudart.lib;cublas.lib;%(AdditionalDependencies)</AdditionalDependencies>
<SubSystem>Windows</SubSystem>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
Expand All @@ -181,7 +180,7 @@
<ClInclude Include="..\..\include\models\factoryllm.h" />
<ClInclude Include="..\..\include\models\glm.h" />
<ClInclude Include="..\..\include\models\llama.h" />
<ClCompile Include="..\..\include\models\minicpm.h" />
<ClInclude Include="..\..\include\models\minicpm.h" />
<ClInclude Include="..\..\include\models\moss.h" />
<ClInclude Include="..\..\include\models\qwen.h" />
<ClInclude Include="..\..\include\utils\armMath.h" />
Expand Down
21 changes: 13 additions & 8 deletions include/models/basellm.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
#pragma once

#ifndef FASTLLM_BASELLM_H
#define FASTLLM_BASELLM_H

#include "fastllm.h"

#include <thread>
Expand Down Expand Up @@ -50,9 +53,9 @@ namespace fastllm {
this->weight.ReleaseWeight();
};

virtual void LoadFromFile(const std::string &fileName); // 从文件读取
virtual void LoadFromFile(const std::string &fileName); // 从文件读取

virtual void InitParams(); // 初始化参数信息
virtual void InitParams(); // 初始化参数信息

// 推理
virtual int Forward(
Expand Down Expand Up @@ -85,12 +88,12 @@ namespace fastllm {
const LastTokensManager &lastTokens = LastTokensManager(),
std::vector <std::vector <float>*> *logits = nullptr);

// 根据输入的tokens生成LLM推理的输入
// 根据输入的tokens生成LLM推理的输入
virtual void FillLLMInputs(std::vector <std::vector <float> > &inputTokens,
const std::map <std::string, int> &params,
Data &inputIds, Data &attentionMask, Data &positionIds);

// 根据输入的tokens生成LLM推理的输入
// 根据输入的tokens生成LLM推理的输入
virtual void FillLLMInputsBatch(std::vector <std::vector <float> > &inputTokens,
const std::vector <std::map <std::string, int> > &params,
Data &inputIds, Data &attentionMask, Data &positionIds);
Expand All @@ -102,16 +105,16 @@ namespace fastllm {
virtual void ResponseBatch(const std::vector<std::string> &inputs,
std::vector<std::string> &outputs,
RuntimeResultBatch retCb = nullptr,
const GenerationConfig &generationConfig = GenerationConfig()); // 批量根据给出的内容回复
const GenerationConfig &generationConfig = GenerationConfig()); // 批量根据给出的内容回复

virtual int LaunchResponseTokens(const std::vector <int> &inputTokens,
const GenerationConfig &generationConfig = GenerationConfig()); // 启动一个response任务,返回分配的handleId

virtual int FetchResponseTokens(int handleId); // 获取指定handle的输出, -1代表输出结束了
virtual int FetchResponseTokens(int handleId); // 获取指定handle的输出, -1代表输出结束了

virtual int FetchResponseLogits(int handleId, std::vector <float> &logits); // 获取指定handle的输出Logits

virtual void SaveLowBitModel(const std::string &fileName, int bit); // 存储成量化模型
virtual void SaveLowBitModel(const std::string &fileName, int bit); // 存储成量化模型

virtual void SaveModel(const std::string &fileName); // 直接导出

Expand Down Expand Up @@ -158,3 +161,5 @@ namespace fastllm {
int tokensLimit = -1;
};
}

#endif //FASTLLM_BASELLM_H

0 comments on commit 1eb74ed

Please sign in to comment.