From 9cfe4d7202da427e5e7f65000021ca33f283b26b Mon Sep 17 00:00:00 2001
From: ochafik <ochafik@google.com>
Date: Thu, 26 Sep 2024 18:06:03 +0100
Subject: [PATCH] `tool-call`: refactor llama_chat_template class + use in
 validate_model_chat_template

---
 common/chat-template.cpp   | 58 +++++++++++++++++++++++++-------------
 common/chat-template.h     | 26 ++++-------------
 examples/server/server.cpp | 20 +++++++++++--
 3 files changed, 61 insertions(+), 43 deletions(-)

diff --git a/common/chat-template.cpp b/common/chat-template.cpp
index 3f84a1fb53430..ed37513beb8ef 100644
--- a/common/chat-template.cpp
+++ b/common/chat-template.cpp
@@ -1,5 +1,4 @@
 #include "chat-template.h"
-#include "minja.hpp"
 #include "llama.h"
 
 using json = nlohmann::ordered_json;
@@ -31,14 +30,39 @@ static std::string llama_model_meta_val_str(const struct llama_model * model, co
     return "";
 }
 
+llama_chat_template::llama_chat_template(const std::string & chat_template, const std::string & bos_token, const std::string & eos_token)
+    : _chat_template(chat_template), _bos_token(bos_token), _eos_token(eos_token) {
+
+    _supports_tools = chat_template.find("tools") != std::string::npos;
+    _requires_object_arguments = chat_template.find("tool_call.arguments | items") != std::string::npos;
+    _supports_system_role = chat_template.find("System role not supported") == std::string::npos;
+
+    if (chat_template.find("<tool_call>") != std::string::npos) {
+        _tool_call_style = Hermes2Pro;
+    } else if (chat_template.find(">>>all") != std::string::npos) {
+        _tool_call_style = FunctionaryV3Llama3;
+    } else if (chat_template.find("<|start_header_id|>") != std::string::npos) {
+        if (chat_template.find("<function=") != std::string::npos) {
+            _tool_call_style = FunctionaryV3Llama31;
+        } else if (chat_template.find("<|python_tag|>") != std::string::npos) {
+            _tool_call_style = Llama31;
+        }
+    }
+    _template_root = minja::Parser::parse(_chat_template, {
+        /* .trim_blocks = */ true,
+        /* .lstrip_blocks = */ true,
+        /* .keep_trailing_newline = */ false,
+    });
+}
+
 llama_chat_template llama_chat_template::from_model(
     const struct llama_model * model,
-    const std::string & chat_template_override)
+    const char * chat_template_override)
 {
     // TODO: handle "chatml"?
-    auto chat_template = chat_template_override.empty()
-        ? llama_model_meta_val_str(model, "tokenizer.chat_template")
-        : chat_template_override;
+    std::string chat_template = chat_template_override
+        ? chat_template_override
+        : llama_model_meta_val_str(model, "tokenizer.chat_template");
     auto bos_token = _llama_token_to_piece(model, llama_token_bos(model), true);
     auto eos_token = _llama_token_to_piece(model, llama_token_eos(model), true);
     return llama_chat_template(chat_template, bos_token, eos_token);
@@ -69,9 +93,9 @@ std::string llama_chat_template::apply(
                 throw std::runtime_error("message must have 'role' and 'content' fields: " + message.dump());
             }
             std::string role = message.at("role");
-            std::string content = message.at("content");
 
-            if (!_supports_system_role) {
+            if (!message["content"].is_null() && !_supports_system_role) {
+                std::string content = message.at("content");
                 if (role == "system") {
                     if (!pending_system.empty()) pending_system += "\n";
                     pending_system += content;
@@ -89,8 +113,11 @@ std::string llama_chat_template::apply(
             }
             if (_requires_object_arguments && message.contains("tool_calls")) {
                 for (auto & tool_call : message.at("tool_calls")) {
-                    std::string arguments = tool_call.at("arguments");
-                    tool_call["arguments"] = json::parse(arguments);
+                    if (tool_call["type"] == "function") {
+                        auto & function = tool_call.at("function");
+                        std::string arguments = function.at("arguments");
+                        function["arguments"] = json::parse(arguments);
+                    }
                 }
             }
         }
@@ -99,20 +126,11 @@ std::string llama_chat_template::apply(
 
     auto context = minja::Context::make(json({
         {"messages", actual_messages},
+        {"tools", tools},
         {"add_generation_prompt", add_generation_prompt},
         {"bos_token", _bos_token},
         {"eos_token", _eos_token},
     }));
 
-    if (!tools.is_null() && !tools.empty()) {
-        auto tools_val = minja::Value(tools);
-        context->set("tools", tools_val);
-    }
-
-    auto tmpl_root = minja::Parser::parse(_chat_template, {
-        /* .trim_blocks = */ true,
-        /* .lstrip_blocks = */ true,
-        /* .keep_trailing_newline = */ false,
-    });
-    return tmpl_root->render(context);
+    return _template_root->render(context);
 }
diff --git a/common/chat-template.h b/common/chat-template.h
index 4bab3ff08a346..e4dc7667f42dc 100644
--- a/common/chat-template.h
+++ b/common/chat-template.h
@@ -1,11 +1,13 @@
 #pragma once
 
+#include "minja.hpp"
 #include <json.hpp>
 #include <string>
 #include <vector>
 
 using json = nlohmann::ordered_json;
 
+
 enum llama_tool_call_style {
     Unknown,
     Llama31,
@@ -27,30 +29,14 @@ class llama_chat_template {
     std::string _chat_template;
     std::string _bos_token;
     std::string _eos_token;
-  public:
-    llama_chat_template(const std::string & chat_template, const std::string & bos_token, const std::string & eos_token)
-        : _chat_template(chat_template), _bos_token(bos_token), _eos_token(eos_token) {
+    std::unique_ptr<minja::TemplateNode> _template_root;
 
-        _supports_tools = chat_template.find("tools") != std::string::npos;
-        _requires_object_arguments = chat_template.find("tool_call.arguments | items") != std::string::npos;
-        _supports_system_role = chat_template.find("System role not supported") == std::string::npos;
-
-        if (chat_template.find("<tool_call>") != std::string::npos) {
-            _tool_call_style = Hermes2Pro;
-        } else if (chat_template.find(">>>all") != std::string::npos) {
-            _tool_call_style = FunctionaryV3Llama3;
-        } else if (chat_template.find("<|start_header_id|>") != std::string::npos) {
-            if (chat_template.find("<function=") != std::string::npos) {
-                _tool_call_style = FunctionaryV3Llama31;
-            } else if (chat_template.find("<|python_tag|>") != std::string::npos) {
-                _tool_call_style = Llama31;
-            }
-        }
-    }
+  public:
+    llama_chat_template(const std::string & chat_template, const std::string & bos_token, const std::string & eos_token);
 
     static llama_chat_template from_model(
         const struct llama_model * model,
-        const std::string & chat_template_override);
+        const char * chat_template_override = nullptr);
 
     llama_tool_call_style tool_call_style() const { return _tool_call_style; }
 
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index 341d1cb45e589..65c0eab0d839b 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -662,9 +662,23 @@ struct server_context {
     bool validate_model_chat_template(bool use_jinja) const {
         llama_chat_message chat[] = {{"user", "test"}};
 
-        const int res = llama_chat_apply_template(model, nullptr, chat, 1, true, nullptr, 0);
+        if (use_jinja) {
+            auto chat_template = llama_chat_template::from_model(model);
+            try {
+                chat_template.apply({{
+                    {"role", "user"},
+                    {"content", "test"},
+                }}, json(), true);
+                return true;
+            } catch (const std::exception & e) {
+                SRV_ERR("failed to apply template: %s\n", e.what());
+                return false;
+            }
+        } else {
+            const int res = llama_chat_apply_template(model, nullptr, chat, 1, true, nullptr, 0);
 
-        return res > 0;
+            return res > 0;
+        }
     }
 
     void init() {
@@ -2860,7 +2874,7 @@ int main(int argc, char ** argv) {
             return;
         }
 
-        auto chat_template = llama_chat_template::from_model(ctx_server.model, params.chat_template);
+        auto chat_template = llama_chat_template::from_model(ctx_server.model, params.chat_template.empty() ? nullptr : params.chat_template.c_str());
 
         json data;
         try {