From d146334c1165f21ffac9a7c2f5a944f12cd085ea Mon Sep 17 00:00:00 2001
From: Billel Mokeddem <billel.mokeddem.ml@gmail.com>
Date: Tue, 17 Dec 2024 09:46:19 +0000
Subject: [PATCH 1/7] Add Falcon3 model support

---
 convert_hf_to_gguf.py        |  6 ++++++
 convert_hf_to_gguf_update.py |  1 +
 src/llama.cpp                | 18 ++++++++++++++++++
 3 files changed, 25 insertions(+)

diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
index 9dc1673bc2c06..66e268af61419 100755
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -525,6 +525,9 @@ def get_vocab_base(self) -> tuple[list[str], list[int], str]:
             else:
                 token: str = reverse_vocab[i]
                 if token in added_vocab:
+                    # We need to manually encode and decode the added tokens in case special characters
+                    # used for `\n` / `\t` have been manually added in the added tokens
+                    token = tokenizer.decode(tokenizer.encode(token))
                     if tokenizer.added_tokens_decoder[i].special or self.does_token_look_special(token):
                         toktypes.append(gguf.TokenType.CONTROL)
                     else:
@@ -571,6 +574,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
         if chkhsh == "8aeee3860c56296a157a1fe2fad249ec40aa59b1bb5709f4ade11c4e6fe652ed":
             # ref: https://huggingface.co/tiiuae/falcon-7b
             res = "falcon"
+        if chkhsh == "9d032fcbd5501f4a38150912590928bfb36091efb5df11b8e2124b0390e3fb1e":
+            # ref: https://huggingface.co/tiiuae/Falcon3-7B-Base
+            res = "falcon3"
         if chkhsh == "0876d13b50744004aa9aeae05e7b0647eac9d801b5ba4668afc01e709c15e19f":
             # ref: https://huggingface.co/BAAI/bge-small-en-v1.5
             res = "bert-bge"
diff --git a/convert_hf_to_gguf_update.py b/convert_hf_to_gguf_update.py
index 88058442f6dc4..2ba346640b352 100755
--- a/convert_hf_to_gguf_update.py
+++ b/convert_hf_to_gguf_update.py
@@ -72,6 +72,7 @@ class TOKENIZER_TYPE(IntEnum):
     {"name": "deepseek-coder", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-base", },
     {"name": "falcon",         "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/falcon-7b", },
     {"name": "bert-bge",       "tokt": TOKENIZER_TYPE.WPM, "repo": "https://huggingface.co/BAAI/bge-small-en-v1.5", },
+    {"name": "falcon3",        "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/Falcon3-7B-Base", },
     {"name": "bert-bge-large", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/BAAI/bge-large-zh-v1.5", },
     {"name": "mpt",            "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/mosaicml/mpt-7b", },
     {"name": "starcoder",      "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/bigcode/starcoder2-3b", },
diff --git a/src/llama.cpp b/src/llama.cpp
index 8b799e0ebeda7..1cc8a93323b4a 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -1612,6 +1612,7 @@ enum llm_chat_template {
     LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN,
     LLM_CHAT_TEMPLATE_MISTRAL_V7,
     LLM_CHAT_TEMPLATE_PHI_3,
+    LLM_CHAT_TEMPLATE_FALCON_3,
     LLM_CHAT_TEMPLATE_ZEPHYR,
     LLM_CHAT_TEMPLATE_MONARCH,
     LLM_CHAT_TEMPLATE_GEMMA,
@@ -1644,6 +1645,7 @@ static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
     { "mistral-v3-tekken", LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN },
     { "mistral-v7",        LLM_CHAT_TEMPLATE_MISTRAL_V7        },
     { "phi3",              LLM_CHAT_TEMPLATE_PHI_3             },
+    { "falcon3",           LLM_CHAT_TEMPLATE_FALCON_3          },
     { "zephyr",            LLM_CHAT_TEMPLATE_ZEPHYR            },
     { "monarch",           LLM_CHAT_TEMPLATE_MONARCH           },
     { "gemma",             LLM_CHAT_TEMPLATE_GEMMA             },
@@ -6473,6 +6475,11 @@ static void llm_load_vocab(
             } else if (
                     tokenizer_pre == "falcon") {
                 vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_FALCON;
+            } else if (
+                    tokenizer_pre == "falcon3") {
+                vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_LLAMA3;
+                vocab.tokenizer_ignore_merges = true;
+                vocab.tokenizer_add_bos = true;
             } else if (
                     tokenizer_pre == "mpt") {
                 vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_MPT;
@@ -22219,6 +22226,8 @@ static llm_chat_template llama_chat_detect_template(const std::string & tmpl) {
         }
     } else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|end|>")) {
         return LLM_CHAT_TEMPLATE_PHI_3;
+    } else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|user|>")) {
+        return LLM_CHAT_TEMPLATE_FALCON_3;
     } else if (tmpl_contains("<|user|>") && tmpl_contains("<|endoftext|>")) {
         return LLM_CHAT_TEMPLATE_ZEPHYR;
     } else if (tmpl_contains("bos_token + message['role']")) {
@@ -22371,6 +22380,15 @@ static int32_t llama_chat_apply_template_internal(
         if (add_ass) {
             ss << "<|assistant|>\n";
         }
+    } else if (tmpl == LLM_CHAT_TEMPLATE_FALCON_3) {
+        // Falcon 3
+        for (auto message : chat) {
+            std::string role(message->role);
+            ss << "<|" << role << "|>\n" << message->content << "\n";
+        }
+        if (add_ass) {
+            ss << "<|assistant|>\n";
+        }
     } else if (tmpl == LLM_CHAT_TEMPLATE_ZEPHYR) {
         // zephyr template
         for (auto message : chat) {

From fc055407b7c557f8e935aa2191485b0de967e2a1 Mon Sep 17 00:00:00 2001
From: Billel Mokeddem <billel.mokeddem.ml@gmail.com>
Date: Wed, 18 Dec 2024 04:58:00 +0000
Subject: [PATCH 2/7] Add fix for adding bos to added special tokens

---
 convert_hf_to_gguf.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
index 66e268af61419..77ab5ef4ae6b6 100755
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -527,7 +527,9 @@ def get_vocab_base(self) -> tuple[list[str], list[int], str]:
                 if token in added_vocab:
                     # We need to manually encode and decode the added tokens in case special characters
                     # used for `\n` / `\t` have been manually added in the added tokens
-                    token = tokenizer.decode(tokenizer.encode(token))
+                    if len(token) == 1:
+                        token = tokenizer.decode(tokenizer.encode(token, add_special_tokens=False))
+
                     if tokenizer.added_tokens_decoder[i].special or self.does_token_look_special(token):
                         toktypes.append(gguf.TokenType.CONTROL)
                     else:

From b3d022aa1a352b39797ae7367448759fef631084 Mon Sep 17 00:00:00 2001
From: Billel Mokeddem <billel.mokeddem.ml@gmail.com>
Date: Wed, 18 Dec 2024 05:46:07 +0000
Subject: [PATCH 3/7] Add comment explaining the logic behind the if statement

---
 convert_hf_to_gguf.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
index 77ab5ef4ae6b6..1549022523f9e 100755
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -527,6 +527,7 @@ def get_vocab_base(self) -> tuple[list[str], list[int], str]:
                 if token in added_vocab:
                     # We need to manually encode and decode the added tokens in case special characters
                     # used for `\n` / `\t` have been manually added in the added tokens
+                    # To avoid unexpected issues - we make sure to encode single-char tokens
                     if len(token) == 1:
                         token = tokenizer.decode(tokenizer.encode(token, add_special_tokens=False))
 

From d8d2f370dc97f3bba3ecc1f9fc6b6853a0794028 Mon Sep 17 00:00:00 2001
From: Billel Mokeddem <billel.mokeddem.ml@gmail.com>
Date: Wed, 18 Dec 2024 07:23:35 +0000
Subject: [PATCH 4/7] Add a log message to better track the when the following
 line of code is triggered

---
 convert_hf_to_gguf.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
index 1549022523f9e..cd5dd9435bece 100755
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -529,6 +529,7 @@ def get_vocab_base(self) -> tuple[list[str], list[int], str]:
                     # used for `\n` / `\t` have been manually added in the added tokens
                     # To avoid unexpected issues - we make sure to encode single-char tokens
                     if len(token) == 1:
+                        logger.info("Ecode-Decode special characters using AutoTokenizer")
                         token = tokenizer.decode(tokenizer.encode(token, add_special_tokens=False))
 
                     if tokenizer.added_tokens_decoder[i].special or self.does_token_look_special(token):

From 92e41ec4b993c75cc6cb4fa92f7d233084741bb8 Mon Sep 17 00:00:00 2001
From: Billel Mokeddem <billel.mokeddem.ml@gmail.com>
Date: Wed, 18 Dec 2024 08:20:28 +0000
Subject: [PATCH 5/7] Update log to only print when input and output characters
 are different

---
 convert_hf_to_gguf.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
index cd5dd9435bece..06e3016cc9e5c 100755
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -529,8 +529,10 @@ def get_vocab_base(self) -> tuple[list[str], list[int], str]:
                     # used for `\n` / `\t` have been manually added in the added tokens
                     # To avoid unexpected issues - we make sure to encode single-char tokens
                     if len(token) == 1:
-                        logger.info("Ecode-Decode special characters using AutoTokenizer")
+                        previous_token = token
                         token = tokenizer.decode(tokenizer.encode(token, add_special_tokens=False))
+                        if previous_token != token:
+                            logger.info(f"{repr(previous_token)} is encoded and decoded back to {repr(token)} using AutoTokenizer")
 
                     if tokenizer.added_tokens_decoder[i].special or self.does_token_look_special(token):
                         toktypes.append(gguf.TokenType.CONTROL)

From a1f146dba1126c6557d9c7c8696753aba87ec5e4 Mon Sep 17 00:00:00 2001
From: Billel Mokeddem <billel.mokeddem.ml@gmail.com>
Date: Sun, 22 Dec 2024 20:12:46 +0000
Subject: [PATCH 6/7] Fix handling pre-normalized tokens

---
 convert_hf_to_gguf.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
index 06e3016cc9e5c..a55bedc72cdfe 100755
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -525,10 +525,9 @@ def get_vocab_base(self) -> tuple[list[str], list[int], str]:
             else:
                 token: str = reverse_vocab[i]
                 if token in added_vocab:
-                    # We need to manually encode and decode the added tokens in case special characters
-                    # used for `\n` / `\t` have been manually added in the added tokens
-                    # To avoid unexpected issues - we make sure to encode single-char tokens
-                    if len(token) == 1:
+                    # The tokenizer in llama.cpp assumes the CONTROL and USER_DEFINED tokens are pre-normalized.
+                    # To avoid unexpected issues - we make sure to normalize non-normalized tokens
+                    if not tokenizer.added_tokens_decoder[i].normalized:
                         previous_token = token
                         token = tokenizer.decode(tokenizer.encode(token, add_special_tokens=False))
                         if previous_token != token:
@@ -537,6 +536,8 @@ def get_vocab_base(self) -> tuple[list[str], list[int], str]:
                     if tokenizer.added_tokens_decoder[i].special or self.does_token_look_special(token):
                         toktypes.append(gguf.TokenType.CONTROL)
                     else:
+                        # NOTE: this was added for Gemma.
+                        # Encoding and decoding the tokens above isn't sufficient for this case.
                         token = token.replace(b"\xe2\x96\x81".decode("utf-8"), " ")  # pre-normalize user-defined spaces
                         toktypes.append(gguf.TokenType.USER_DEFINED)
                 else:

From 64d8687e22997f085ec811223b09ead34d8037bb Mon Sep 17 00:00:00 2001
From: Billel Mokeddem <billel.mokeddem.ml@gmail.com>
Date: Sun, 22 Dec 2024 20:39:40 +0000
Subject: [PATCH 7/7] Refactoring

---
 src/llama.cpp | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/src/llama.cpp b/src/llama.cpp
index 1cc8a93323b4a..00011e84255cf 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -6460,7 +6460,8 @@ static void llm_load_vocab(
             } else if (
                     tokenizer_pre == "llama3"   ||
                     tokenizer_pre == "llama-v3" ||
-                    tokenizer_pre == "llama-bpe") {
+                    tokenizer_pre == "llama-bpe"||
+                    tokenizer_pre == "falcon3") {
                 vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_LLAMA3;
                 vocab.tokenizer_ignore_merges = true;
                 vocab.tokenizer_add_bos = true;
@@ -6475,11 +6476,6 @@ static void llm_load_vocab(
             } else if (
                     tokenizer_pre == "falcon") {
                 vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_FALCON;
-            } else if (
-                    tokenizer_pre == "falcon3") {
-                vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_LLAMA3;
-                vocab.tokenizer_ignore_merges = true;
-                vocab.tokenizer_add_bos = true;
             } else if (
                     tokenizer_pre == "mpt") {
                 vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_MPT;