From f259993c2464967cc500f827069289550c7ca3d5 Mon Sep 17 00:00:00 2001
From: Yingbei <yingbei@acorn.io>
Date: Wed, 3 Jul 2024 17:41:53 -0700
Subject: [PATCH 1/3] test results for functionary small & medium

---
 docs/docs/benchmark.mdx               |  2 ++
 docs/src/components/BenchmarkTable.js | 18 ++++++++++++++++++
 2 files changed, 20 insertions(+)

diff --git a/docs/docs/benchmark.mdx b/docs/docs/benchmark.mdx
index 95016d8..e366193 100644
--- a/docs/docs/benchmark.mdx
+++ b/docs/docs/benchmark.mdx
@@ -30,6 +30,8 @@ Some of the LLMs above require using custom libraries to post-process LLM genera
 
 `Nexusflow/NexusRaven-V2-13B` required [nexusraven-pip](https://github.com/nexusflowai/nexusraven-pip).
 
+`functionary-small-v2.5` and `functionary-medium-v3.0` models are tested using [MeetKai's functionary](https://github.com/MeetKai/functionary?tab=readme-ov-file#setup) with the vllm framework. For each model, we compared the results with functionary's `Grammar Sampling` feature enabled and disabled, taking the highest score from either configuration. The `functionary-small-v2.5` model achieved a higher score than the `functionary-medium-v3.0` model, primarily due to the medium model exhibiting more hallucinations in some of our more advanced test cases.
+
 :::::
 
 ∔ `Nexusflow/NexusRaven-V2-13B` and `gorilla-llm/gorilla-openfunctions-v2` don't accept tool observations, the result of running a tool or function once the LLM calls it, so we appended the observation to the prompt.
\ No newline at end of file
diff --git a/docs/src/components/BenchmarkTable.js b/docs/src/components/BenchmarkTable.js
index 94f54ae..182de7d 100644
--- a/docs/src/components/BenchmarkTable.js
+++ b/docs/src/components/BenchmarkTable.js
@@ -200,6 +200,24 @@ const data = [
         gsm8k: '-',
         math: '-',
         mtBench:'-',
+    },
+    {
+        model: 'functionary-medium-v3.0',
+        functionCalling: '46.43%',
+        mmlu: '-',
+        gpqa: '-',
+        gsm8k: '-',
+        math: '-',
+        mtBench:'-',
+    },
+    {
+        model: 'functionary-small-v2.5',
+        functionCalling: '57.14%',
+        mmlu: '-',
+        gpqa: '-',
+        gsm8k: '-',
+        math: '-',
+        mtBench:'-',
     }
 ];
 

From 1c323d4bc2cbd371b13dde8b58366d8f5cbb359b Mon Sep 17 00:00:00 2001
From: Yingbei <yingbei@acorn.io>
Date: Wed, 3 Jul 2024 17:45:12 -0700
Subject: [PATCH 2/3] update param size of functionary models

---
 docs/src/components/BenchmarkTable.js | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/src/components/BenchmarkTable.js b/docs/src/components/BenchmarkTable.js
index 182de7d..f83f019 100644
--- a/docs/src/components/BenchmarkTable.js
+++ b/docs/src/components/BenchmarkTable.js
@@ -203,6 +203,7 @@ const data = [
     },
     {
         model: 'functionary-medium-v3.0',
+        params: 70.6,
         functionCalling: '46.43%',
         mmlu: '-',
         gpqa: '-',
@@ -212,6 +213,7 @@ const data = [
     },
     {
         model: 'functionary-small-v2.5',
+        params: 8.03,
         functionCalling: '57.14%',
         mmlu: '-',
         gpqa: '-',

From be4ed860352121f8477e0dddff1b742c63b51e05 Mon Sep 17 00:00:00 2001
From: sanjay920 <sanjay.nadhavajhala@gmail.com>
Date: Thu, 4 Jul 2024 17:27:55 -0700
Subject: [PATCH 3/3] add functionary models' benchmark results

---
 docs/src/components/BenchmarkTable.js | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/docs/src/components/BenchmarkTable.js b/docs/src/components/BenchmarkTable.js
index f83f019..f2ee978 100644
--- a/docs/src/components/BenchmarkTable.js
+++ b/docs/src/components/BenchmarkTable.js
@@ -205,21 +205,21 @@ const data = [
         model: 'functionary-medium-v3.0',
         params: 70.6,
         functionCalling: '46.43%',
-        mmlu: '-',
-        gpqa: '-',
-        gsm8k: '-',
-        math: '-',
-        mtBench:'-',
+        mmlu: '79.85',
+        gpqa: '38.39',
+        gsm8k: '89.54',
+        math: '43.02',
+        mtBench:'5.49',
     },
     {
         model: 'functionary-small-v2.5',
         params: 8.03,
         functionCalling: '57.14%',
-        mmlu: '-',
-        gpqa: '-',
-        gsm8k: '-',
-        math: '-',
-        mtBench:'-',
+        mmlu: '63.92',
+        gpqa: '32.14',
+        gsm8k: '66.11',
+        math: '20.54',
+        mtBench:'7.09',
     }
 ];