From 5e3c586dff9c2b675aa909abab8a03b78f5d9deb Mon Sep 17 00:00:00 2001 From: Yingbei Date: Tue, 23 Jul 2024 18:04:07 -0700 Subject: [PATCH 1/2] Function call benchmark for llama3.1 --- docs/docs/benchmark.mdx | 2 ++ docs/src/components/BenchmarkTable.js | 10 ++++++++++ 2 files changed, 12 insertions(+) diff --git a/docs/docs/benchmark.mdx b/docs/docs/benchmark.mdx index 6c92fdc..932bdcf 100644 --- a/docs/docs/benchmark.mdx +++ b/docs/docs/benchmark.mdx @@ -34,6 +34,8 @@ Some of the LLMs above require using custom libraries to post-process LLM genera `groq/Llama-3-Groq-8B-Tool-Use` and `groq/Llama-3-Groq-70B-Tool-Use` are tested using [groq's API](https://console.groq.com/docs/tool-use). +`Meta/Llama-3.1-8B-Instruct` is tested using Meta's [Llama3.1 official docs](https://llama.meta.com/docs/model-cards-and-prompt-formats/llama3_1/#user-defined-custom-tool-calling) of User-defined Custom tool calling. + ::::: ∔ `Nexusflow/NexusRaven-V2-13B` and `gorilla-llm/gorilla-openfunctions-v2` don't accept tool observations, the result of running a tool or function once the LLM calls it, so we appended the observation to the prompt. \ No newline at end of file diff --git a/docs/src/components/BenchmarkTable.js b/docs/src/components/BenchmarkTable.js index 8d4c479..9e0bda3 100644 --- a/docs/src/components/BenchmarkTable.js +++ b/docs/src/components/BenchmarkTable.js @@ -249,6 +249,16 @@ const data = [ gsm8k: '-', math: '-', mtBench:'-', + }, + { + model: 'Meta/Llama-3.1-8B-Instruct', + params: 8.03, + functionCalling: '32.50%', + mmlu: '-', + gpqa: '-', + gsm8k: '-', + math: '-', + mtBench:'-', } ]; From c689c4e48d8d8be5c19642ff351019c93e941dfa Mon Sep 17 00:00:00 2001 From: Yingbei Date: Wed, 24 Jul 2024 15:55:01 -0700 Subject: [PATCH 2/2] update results of llama3.1 - 70b --- docs/docs/benchmark.mdx | 2 +- docs/src/components/BenchmarkTable.js | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/docs/docs/benchmark.mdx b/docs/docs/benchmark.mdx index 932bdcf..a89e027 100644 --- a/docs/docs/benchmark.mdx +++ b/docs/docs/benchmark.mdx @@ -34,7 +34,7 @@ Some of the LLMs above require using custom libraries to post-process LLM genera `groq/Llama-3-Groq-8B-Tool-Use` and `groq/Llama-3-Groq-70B-Tool-Use` are tested using [groq's API](https://console.groq.com/docs/tool-use). -`Meta/Llama-3.1-8B-Instruct` is tested using Meta's [Llama3.1 official docs](https://llama.meta.com/docs/model-cards-and-prompt-formats/llama3_1/#user-defined-custom-tool-calling) of User-defined Custom tool calling. +`Meta/Llama-3.1-8B-Instruct` and `Meta/Llama-3.1-70B-Instruct` are tested using Meta's [Llama3.1 official docs](https://llama.meta.com/docs/model-cards-and-prompt-formats/llama3_1/#user-defined-custom-tool-calling) of User-defined Custom tool calling. ::::: diff --git a/docs/src/components/BenchmarkTable.js b/docs/src/components/BenchmarkTable.js index 9e0bda3..bcd988d 100644 --- a/docs/src/components/BenchmarkTable.js +++ b/docs/src/components/BenchmarkTable.js @@ -259,6 +259,16 @@ const data = [ gsm8k: '-', math: '-', mtBench:'-', + }, + { + model: 'Meta/Llama-3.1-70B-Instruct', + params: 70.6, + functionCalling: '63.75%%', + mmlu: '-', + gpqa: '-', + gsm8k: '-', + math: '-', + mtBench:'-', } ];