Skip to content

Commit

Permalink
Merge pull request #18 from l3utterfly/merge
Browse files Browse the repository at this point in the history
merged from upstream
  • Loading branch information
l3utterfly authored May 3, 2024
2 parents da1a628 + c93b977 commit 3ced4ff
Show file tree
Hide file tree
Showing 15 changed files with 526 additions and 177 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/close-issue.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:
steps:
- uses: actions/stale@v5
with:
exempt-issue-labels: "refactor,help wanted,good first issue,research"
exempt-issue-labels: "refactor,help wanted,good first issue,research,bug"
days-before-issue-stale: 30
days-before-issue-close: 14
stale-issue-label: "stale"
Expand Down
8 changes: 4 additions & 4 deletions ci/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -518,10 +518,10 @@ function gg_run_open_llama_7b_v2 {

(time ./bin/imatrix --model ${model_f16} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-imatrix.log

(time ./bin/save-load-state --model -ngl 10 ${model_q4_0} ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
(time ./bin/save-load-state --model -fa -ngl 10 ${model_q4_0} ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
(time ./bin/save-load-state --model -ngl 99 ${model_q4_0} ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
(time ./bin/save-load-state --model -fa -ngl 99 ${model_q4_0} ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
(time ./bin/save-load-state -ngl 10 --model ${model_q4_0} ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
(time ./bin/save-load-state -fa -ngl 10 --model ${model_q4_0} ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
(time ./bin/save-load-state -ngl 99 --model ${model_q4_0} ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
(time ./bin/save-load-state -fa -ngl 99 --model ${model_q4_0} ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log

function check_ppl {
qnt="$1"
Expand Down
2 changes: 1 addition & 1 deletion common/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ struct gpt_params {
bool multiple_choice = false; // compute TruthfulQA score over random tasks from datafile supplied in prompt
size_t multiple_choice_tasks = 0; // number of tasks to use when computing the TruthfulQA score. If 0, all tasks will be computed

bool kl_divergence = false; // compute KL-divergence
bool kl_divergence = false; // compute KL divergence

bool random_prompt = false; // do not randomize prompt if none provided
bool use_color = false; // use color to distinguish generations and inputs
Expand Down
4 changes: 2 additions & 2 deletions common/log.h
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ inline std::string log_filename_generator_impl(LogTriState multilog, const std::
// INTERNAL, DO NOT USE
// USE LOG() INSTEAD
//
#if !defined(_MSC_VER) || defined(__INTEL_LLVM_COMPILER)
#if !defined(_MSC_VER) || defined(__INTEL_LLVM_COMPILER) || defined(__clang__)
#define LOG_IMPL(str, ...) \
do { \
if (LOG_TARGET != nullptr) \
Expand All @@ -257,7 +257,7 @@ inline std::string log_filename_generator_impl(LogTriState multilog, const std::
// INTERNAL, DO NOT USE
// USE LOG_TEE() INSTEAD
//
#if !defined(_MSC_VER) || defined(__INTEL_LLVM_COMPILER)
#if !defined(_MSC_VER) || defined(__INTEL_LLVM_COMPILER) || defined(__clang__)
#define LOG_TEE_IMPL(str, ...) \
do { \
if (LOG_TARGET != nullptr) \
Expand Down
16 changes: 10 additions & 6 deletions convert-hf-to-gguf-update.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ def download_file_with_auth(url, token, save_path):
print(f"chkhsh: {chkhsh}")

# print the "pre_tokenizer" content from the tokenizer.json
with open(f"models/tokenizers/{name}/tokenizer.json", "r") as f:
with open(f"models/tokenizers/{name}/tokenizer.json", "r", encoding="utf-8") as f:
cfg = json.load(f)
pre_tokenizer = cfg["pre_tokenizer"]
print("pre_tokenizer: " + json.dumps(pre_tokenizer, indent=4))
Expand Down Expand Up @@ -156,15 +156,19 @@ def download_file_with_auth(url, token, save_path):
src_func += "\n"
src_func += " res = None\n"
src_func += "\n"
src_func += " # NOTE: if you get an error here, you need to add the model to the if-elif chain below\n"
src_func += " # don't do this manually - use the convert-hf-to-gguf-update.py script!\n"
src_func += " # NOTE: if you get an error here, you need to update the convert-hf-to-gguf-update.py script\n"
src_func += " # or pull the latest version of the model from Huggingface\n"
src_func += " # don't edit the hashes manually!\n"
src_func += f"{src_ifs}\n"
src_func += " if res is None:\n"
src_func += " print(\"\\n\")\n"
src_func += " print(\"**************************************************************************************\")\n"
src_func += " print(\"** WARNING: The BPE pre-tokenizer was not recognized!\")\n"
src_func += " print(\"** This means that it was not added yet or you are using an older version.\")\n"
src_func += " print(\"** Check convert-hf-to-gguf-update.py and update it accordingly.\")\n"
src_func += " print(\"** There are 2 possible reasons for this:\")\n"
src_func += " print(\"** - the model has not been added to convert-hf-to-gguf-update.py yet\")\n"
src_func += " print(\"** - the pre-tokenization config has changed upstream\")\n"
src_func += " print(\"** Check your model files and convert-hf-to-gguf-update.py and update them accordingly.\")\n"
src_func += " print(\"** ref: https://github.com/ggerganov/llama.cpp/pull/6920\")\n"
src_func += " print(\"**\")\n"
src_func += " print(f\"** chkhsh: {chkhsh}\")\n"
src_func += " print(\"**************************************************************************************\")\n"
Expand Down Expand Up @@ -249,7 +253,7 @@ def download_file_with_auth(url, token, save_path):
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}")

with open(f"models/ggml-vocab-{name}.gguf.inp", "w") as f:
with open(f"models/ggml-vocab-{name}.gguf.inp", "w", encoding="utf-8") as f:
for text in tests:
f.write(f"{text}")
f.write("\n__ggml_vocab_test__\n")
Expand Down
14 changes: 9 additions & 5 deletions convert-hf-to-gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,8 +279,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:

res = None

# NOTE: if you get an error here, you need to add the model to the if-elif chain below
# don't do this manually - use the convert-hf-to-gguf-update.py script!
# NOTE: if you get an error here, you need to update the convert-hf-to-gguf-update.py script
# or pull the latest version of the model from Huggingface
# don't edit the hashes manually!
if chkhsh == "0ef9807a4087ebef797fc749390439009c3b9eda9ad1a097abbe738f486c01e5":
# ref: https://huggingface.co/meta-llama/Meta-Llama-3-8B
res = "llama-bpe"
Expand Down Expand Up @@ -310,8 +311,11 @@ def get_vocab_base_pre(self, tokenizer) -> str:
print("\n")
print("**************************************************************************************")
print("** WARNING: The BPE pre-tokenizer was not recognized!")
print("** This means that it was not added yet or you are using an older version.")
print("** Check convert-hf-to-gguf-update.py and update it accordingly.")
print("** There are 2 possible reasons for this:")
print("** - the model has not been added to convert-hf-to-gguf-update.py yet")
print("** - the pre-tokenization config has changed upstream")
print("** Check your model files and convert-hf-to-gguf-update.py and update them accordingly.")
print("** ref: https://github.com/ggerganov/llama.cpp/pull/6920")
print("**")
print(f"** chkhsh: {chkhsh}")
print("**************************************************************************************")
Expand Down Expand Up @@ -1423,7 +1427,7 @@ def write_tensors(self):
experts = dict()
for name, data_torch in self.get_tensors():
# we don't need these
if name.endswith((".attention.masked_bias", ".attention.bias", ".attention.rotary_emb.inv_freq")):
if name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")):
continue

old_dtype = data_torch.dtype
Expand Down
2 changes: 1 addition & 1 deletion examples/main/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -546,7 +546,7 @@ int main(int argc, char ** argv) {
// if we run out of context:
// - take the n_keep first tokens from the original prompt (via n_past)
// - take half of the last (n_ctx - n_keep) tokens and recompute the logits in batches
if (n_past + (int) embd.size() + std::max<int>(0, guidance_offset) > n_ctx) {
if (n_past + (int) embd.size() + std::max<int>(0, guidance_offset) >= n_ctx) {
if (params.n_predict == -2) {
LOG_TEE("\n\n%s: context full and n_predict == -%d => stopping\n", __func__, params.n_predict);
break;
Expand Down
Loading

0 comments on commit 3ced4ff

Please sign in to comment.