From 6f7dabab441566078446ef868e573cd309fe62be Mon Sep 17 00:00:00 2001 From: Igor Pissolati Date: Mon, 7 Aug 2023 17:31:13 -0300 Subject: [PATCH] Add simple test for special tokens --- tests/test-tokenizer-0.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/test-tokenizer-0.cpp b/tests/test-tokenizer-0.cpp index 87fde16453d25..c7aeb31a5689e 100644 --- a/tests/test-tokenizer-0.cpp +++ b/tests/test-tokenizer-0.cpp @@ -14,6 +14,8 @@ static const std::map> & k_tests() { " this is 🦙.cpp", { 1, 445, 338, 29871, 243, 162, 169, 156, 29889, 8223, }, }, { "w048 7tuijk dsdfhu", { 1, 29893, 29900, 29946, 29947, 29871, 29955, 9161, 13535, 18031, 2176, 6905, }, }, { "нещо на Български", { 1, 821, 4851, 665, 1386, 29713, 1305, }, }, + { "<🦙>test extra_id_1 test", { 1, 32003, 1688, 29871, 32001, 259, 1243, }, }, + { "<🦙>test extra_id_100 test", { 1, 32003, 1688, 29871, 32002, 1243, }, }, }; return _k_tests; }; @@ -46,6 +48,10 @@ int main(int argc, char **argv) { return 1; } + llama_add_special_token(model, "extra_id_1", 32001); + llama_add_special_token(model, "extra_id_100", 32002); + llama_add_special_token(model, "<🦙>", 32003); + ctx = llama_new_context_with_model(model, lparams); if (ctx == NULL) {