From f15f3e308e86820c2c7f86ec6cdbc94296062573 Mon Sep 17 00:00:00 2001 From: l3utterfly Date: Thu, 24 Aug 2023 00:11:23 +0800 Subject: [PATCH] added support for eot token and unknown token (for an improperly tokenised layla fine-tune) --- examples/main/main.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/examples/main/main.cpp b/examples/main/main.cpp index c63abaab45827..6d6907dd4905b 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -54,6 +54,15 @@ void sigint_handler(int signo) { } #endif + +llama_token llama_token_unk() { + return 0; +} + +llama_token llama_token_eot() { + return 32000; +} + int main(int argc, char ** argv) { gpt_params params; @@ -705,7 +714,7 @@ int main(int argc, char ** argv) { } // deal with end of text token in interactive mode - if (last_n_tokens.back() == llama_token_eos()) { + if (last_n_tokens.back() == llama_token_eos() || last_n_tokens.back() == llama_token_eot() || last_n_tokens.back() == llama_token_unk()) { if (params.interactive) { if (params.antiprompt.size() != 0) { // tokenize and inject first reverse prompt