Skip to content

Commit

Permalink
修复internLM2转换和推理的问题
Browse files Browse the repository at this point in the history
  • Loading branch information
cgli committed Mar 28, 2024
1 parent ef0b7a6 commit 9affcbb
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 14 deletions.
8 changes: 5 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,14 @@ set(CMAKE_BUILD_TYPE "Release")
if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread --std=c++17 -O2")
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
set(CMAKE_CXX_FLAGS_DEBUG "/MTd /Zi /Ob0 /Od /RTC1")
set(CMAKE_CXX_FLAGS_RELEASE "/MT /O2 /Ob1 /Gy /DNDEBUG")
string(REPLACE "/Ob2" "/Ob1 /Gy" CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE})
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNOMINMAX /std:c++17 /arch:AVX2 /source-charset:utf-8")
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread --std=c++17 -O2 -march=native")
endif()


message(STATUS "CMAKE_CXX_FLAGS" ${CMAKE_CXX_FLAGS})
message(STATUS "CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}")
set(FASTLLM_CXX_SOURCES src/fastllm.cpp src/device.cpp src/model.cpp src/executor.cpp
src/devices/cpu/cpudevice.cpp src/devices/cpu/cpudevicebatch.cpp
src/models/chatglm.cpp src/models/moss.cpp src/models/llama.cpp src/models/qwen.cpp src/models/basellm.cpp
Expand Down Expand Up @@ -70,6 +69,9 @@ if (USE_IVCOREX)
endif()

if (PY_API)
if(POLICY CMP0148)
cmake_policy(SET CMP0148 NEW)
endif()
set(PYBIND third_party/pybind11)
add_subdirectory(${PYBIND})
add_compile_definitions(PY_API)
Expand Down
3 changes: 3 additions & 0 deletions src/models/internlm2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,9 @@ namespace fastllm {
Split(qkv, -2, 0, qdim, q);
Split(qkv, -2, qdim, qdim + 1, k);
Split(qkv, -2, qdim + 1, qdim + 2, v);
q.Reshape({bsz, -1, embed_dim});
k.Reshape({bsz, -1, head_dim * num_key_value_heads});
v.Reshape({bsz, -1, head_dim * num_key_value_heads});
} else {
Data qBias = (weight.weight.find(qBiasName) != weight.weight.end()) ? weight[qBiasName] : Data();
Data kBias = (weight.weight.find(kBiasName) != weight.weight.end()) ? weight[kBiasName] : Data();
Expand Down
18 changes: 12 additions & 6 deletions tools/fastllm_pytools/hf_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ def create(model,
if isinstance(rope_scaling, builtins.dict):
modelInfo["rope_scaling.type"] = rope_scaling["type"]
modelInfo["rope_theta"] = rope_scaling["base"]
elif (modelInfo["model_type"] == "internlm2"):
modelInfo["eos_token_id"] = "92542"
if (modelInfo["model_type"] == "chatglm" and hasattr(tokenizer, "build_chat_input")):
# chatglm3
modelInfo["pre_prompt"] = "";
Expand Down Expand Up @@ -110,6 +112,9 @@ def create(model,
if hasattr(tokenizer, "_tokenizer") and hasattr(tokenizer._tokenizer, "decoder") \
and isinstance(tokenizer._tokenizer.decoder, ByteLevel):
modelInfo["tokenizer_byte_as_char"] = True
else:
if hasattr(tokenizer, "byte_encoder") and hasattr(tokenizer, "byte_decoder"):
modelInfo["tokenizer_byte_as_char"] = True

peft_config = {}
active_adapter = ""
Expand Down Expand Up @@ -154,10 +159,11 @@ def create(model,
i, ctypes.c_float(tokenizer.sp_model.get_score(i)));
else:
merges = {}
if (modelInfo["model_type"] == "moss"):
if hasattr(tokenizer, "bpe_ranks"):
merges = {("".join(bpe_tokens), token_index) for bpe_tokens, token_index in sorted(tokenizer.bpe_ranks.items(), key=lambda kv: kv[1])}
elif isinstance(tokenizer, PreTrainedTokenizerFast):
tokenizer_file = tokenizer.name_or_path + tokenizer.vocab_files_names['tokenizer_file']
tokenizer_file_name = tokenizer.vocab_file if hasattr(tokenizer, "vocab_file") else tokenizer.vocab_files_names['tokenizer_file']
tokenizer_file = tokenizer.name_or_path + tokenizer_file_name
if os.path.exists(tokenizer_file):
with open(tokenizer_file, "r", encoding='utf-8') as f:
bpe_merges = json.load(f)["model"]["merges"]
Expand All @@ -166,10 +172,10 @@ def create(model,
vocab = tokenizer.get_vocab()
for v in vocab.keys():
score = merges[v] if v in merges else 1.0
if (modelInfo["model_type"] == "moss"):
s = [(ord(c) if c not in tokenizer.byte_decoder else tokenizer.byte_decoder[c]) for c in v]
llm.fastllm_lib.add_tokenizer_word_llm_model(model_handle, s, vocab[v], ctypes.c_float(score));
elif (modelInfo["model_type"] == "qwen"):
# if (modelInfo["model_type"] == "moss"):
# s = [(ord(c) if c not in tokenizer.byte_decoder else tokenizer.byte_decoder[c]) for c in v]
# llm.fastllm_lib.add_tokenizer_word_llm_model(model_handle, s, vocab[v], ctypes.c_float(score));
if (modelInfo["model_type"] == "qwen"):
llm.fastllm_lib.add_tokenizer_word_llm_model(model_handle, v, vocab[v], ctypes.c_float(1.0));
else:
llm.fastllm_lib.add_tokenizer_word_llm_model(model_handle, v.encode(), vocab[v], ctypes.c_float(score));
Expand Down
16 changes: 11 additions & 5 deletions tools/fastllm_pytools/torch2flm.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,8 @@ def tofile(exportPath,
if isinstance(rope_scaling, builtins.dict):
modelInfo["rope_scaling.type"] = rope_scaling["type"]
modelInfo["rope_theta"] = rope_scaling["base"]
elif (modelInfo["model_type"] == "internlm2"):
modelInfo["eos_token_id"] = "92542"
if (modelInfo["model_type"] == "chatglm" and hasattr(tokenizer, "build_chat_input")):
# chatglm3
modelInfo["pre_prompt"] = "";
Expand Down Expand Up @@ -192,6 +194,9 @@ def tofile(exportPath,
if hasattr(tokenizer, "_tokenizer") and hasattr(tokenizer._tokenizer, "decoder") \
and isinstance(tokenizer._tokenizer.decoder, ByteLevel):
modelInfo["tokenizer_byte_as_char"] = True
else:
if hasattr(tokenizer, "byte_encoder") and hasattr(tokenizer, "byte_decoder"):
modelInfo["tokenizer_byte_as_char"] = True

if hasattr(model, "peft_config"):
adapter_size = len(model.peft_config)
Expand Down Expand Up @@ -230,10 +235,11 @@ def tofile(exportPath,
fo.write(struct.pack('f', float(tokenizer.sp_model.get_score(i))))
else:
merges = {}
if (modelInfo["model_type"] == "moss"):
if hasattr(tokenizer, "bpe_ranks"):
merges = {("".join(bpe_tokens), token_index) for bpe_tokens, token_index in sorted(tokenizer.bpe_ranks.items(), key=lambda kv: kv[1])}
elif isinstance(tokenizer, PreTrainedTokenizerFast):
tokenizer_file = tokenizer.name_or_path + tokenizer.vocab_files_names['tokenizer_file']
tokenizer_file_name = tokenizer.vocab_file if hasattr(tokenizer, "vocab_file") else tokenizer.vocab_files_names['tokenizer_file']
tokenizer_file = tokenizer.name_or_path + tokenizer_file_name
if os.path.exists(tokenizer_file):
with open(tokenizer_file, "r", encoding='utf-8') as f:
bpe_merges = json.load(f)["model"]["merges"]
Expand All @@ -243,9 +249,9 @@ def tofile(exportPath,
fo.write(struct.pack('i', len(vocab)))
for v in vocab.keys():
score = merges[v] if v in merges else 1.0
if (modelInfo["model_type"] == "moss"):
s = [(ord(c) if c not in tokenizer.byte_decoder else tokenizer.byte_decoder[c]) for c in v]
elif (modelInfo["model_type"] == "qwen"):
# if (modelInfo["model_type"] == "moss"):
# s = [(ord(c) if c not in tokenizer.byte_decoder else tokenizer.byte_decoder[c]) for c in v]
if (modelInfo["model_type"] == "qwen"):
s = v
else:
s = v.encode()
Expand Down

0 comments on commit 9affcbb

Please sign in to comment.