Skip to content

Commit

Permalink
Move JSON grammar -> regex grammar conversion to the router (#2772)
Browse files Browse the repository at this point in the history
* Move JSON grammar -> regex grammar conversion to the router

This change moves the JSON grammar -> regex grammar conversion to the
router by adding a dependency on the `outlines-core` Rust crate. In
contrast to the Python implementation, the conversions are not LRU-cached
since they seem to be fast enough:

simple schema           time:   [5.8293 µs 5.8307 µs 5.8320 µs]
                        change: [-13.166% -12.884% -12.641%] (p = 0.00 < 0.05)
                        Performance has improved.

complex schema          time:   [14.875 µs 14.881 µs 14.887 µs]
                        change: [-2.1637% -1.9914% -1.7852%] (p = 0.00 < 0.05)
                        Performance has improved.

Using the schemas from:
https://github.com/dottxt-ai/outlines-core/blob/main/benchmarks/bench_json_schema.py
  • Loading branch information
danieldk authored Nov 25, 2024
1 parent c637d68 commit 289aa48
Show file tree
Hide file tree
Showing 15 changed files with 108 additions and 64 deletions.
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ repos:
hooks:
- id: check-yaml
- id: end-of-file-fixer
exclude: crate-hashes.json
- id: trailing-whitespace
exclude: docs/source/reference/launcher.md
- repo: https://github.com/psf/black
Expand Down
24 changes: 24 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions crate-hashes.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"git+https://github.com/dottxt-ai/outlines-core.git?rev=ba10c619fc9bf3c487e43f49bdecb95a24bb465c#[email protected]": "1j9dcd831b0bmmjk2n4aag3x47qnqmkpg4gqpvwwyic7744llbfm"
}
Original file line number Diff line number Diff line change
@@ -1,23 +1,23 @@
{
"choices": [
{
"finish_reason": "eos_token",
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "{ \"temperature\": [ 26, 30, 33, 29 ] ,\"unit\": \"Fahrenheit\" }",
"content": "{ \"unit\": \"fahrenheit\", \"temperature\": [ 72, 79, 88 ] }",
"role": "assistant"
}
}
],
"created": 1718044128,
"created": 1732525803,
"id": "",
"model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
"object": "text_completion",
"system_fingerprint": "2.0.5-dev0-native",
"object": "chat.completion",
"system_fingerprint": "2.4.1-dev0-native",
"usage": {
"completion_tokens": 39,
"completion_tokens": 29,
"prompt_tokens": 136,
"total_tokens": 175
"total_tokens": 165
}
}
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"choices": [
{
"finish_reason": "eos_token",
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
Expand All @@ -13,27 +13,27 @@
"function": {
"arguments": {
"format": "celsius",
"location": "Brooklyn"
"location": "Brooklyn, New York"
},
"description": null,
"name": "get_current_weather"
},
"id": 0,
"id": "0",
"type": "function"
}
]
},
"usage": null
}
],
"created": 1712782670,
"created": 1732293383,
"id": "",
"model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
"object": "text_completion",
"system_fingerprint": "2.0.1-native",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion",
"system_fingerprint": "2.4.1-dev0-native",
"usage": {
"completion_tokens": 37,
"prompt_tokens": 524,
"total_tokens": 561
"completion_tokens": 30,
"prompt_tokens": 615,
"total_tokens": 645
}
}
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"choices": [
{
"finish_reason": "eos_token",
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
Expand All @@ -13,27 +13,27 @@
"function": {
"arguments": {
"format": "celsius",
"location": "Brooklyn"
"location": "Brooklyn, New York"
},
"description": null,
"name": "get_current_weather"
},
"id": 0,
"id": "0",
"type": "function"
}
]
},
"usage": null
}
],
"created": 1712787937,
"created": 1732293384,
"id": "",
"model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
"object": "text_completion",
"system_fingerprint": "2.0.1-native",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion",
"system_fingerprint": "2.4.1-dev0-native",
"usage": {
"completion_tokens": 37,
"prompt_tokens": 524,
"total_tokens": 561
"completion_tokens": 30,
"prompt_tokens": 615,
"total_tokens": 645
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@
"logprobs": null
}
],
"created": 1729084854,
"created": 1732293254,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"system_fingerprint": "2.3.2-dev0-native",
"system_fingerprint": "2.4.1-dev0-native",
"usage": null
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@
"logprobs": null
}
],
"created": 1729084850,
"created": 1732293246,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"system_fingerprint": "2.3.2-dev0-native",
"system_fingerprint": "2.4.1-dev0-native",
"usage": null
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,22 +6,23 @@
"role": "assistant",
"tool_calls": {
"function": {
"arguments": "</s>",
"arguments": "<|eot_id|>",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
},
"finish_reason": "eos_token",
"finish_reason": "stop",
"index": 0,
"logprobs": null
}
],
"created": 1712788218,
"created": 1732293235,
"id": "",
"model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
"object": "text_completion",
"system_fingerprint": "2.0.1-native"
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"system_fingerprint": "2.4.1-dev0-native",
"usage": null
}
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ class Weather(BaseModel):
called = chat_completion["choices"][0]["message"]["content"]

assert response.status_code == 200
assert called == '{ "temperature": [ 26, 30, 33, 29 ] ,"unit": "Fahrenheit" }'
assert called == '{ "unit": "fahrenheit", "temperature": [ 72, 79, 88 ] }'
assert chat_completion == response_snapshot


Expand Down
10 changes: 5 additions & 5 deletions integration-tests/models/test_tools_llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ async def test_flash_llama_grammar_tools(flash_llama_grammar_tools, response_sna
"function": {
"description": None,
"name": "get_current_weather",
"arguments": {"format": "celsius", "location": "Brooklyn, NY"},
"arguments": {"format": "celsius", "location": "Brooklyn, New York"},
},
}
]
Expand Down Expand Up @@ -138,7 +138,7 @@ async def test_flash_llama_grammar_tools_auto(
"function": {
"description": None,
"name": "get_current_weather",
"arguments": {"format": "celsius", "location": "Brooklyn, NY"},
"arguments": {"format": "celsius", "location": "Brooklyn, New York"},
},
}
]
Expand Down Expand Up @@ -219,7 +219,7 @@ async def test_flash_llama_grammar_tools_stream(

assert (
tool_calls_generated
== '{"function": {"_name": "get_current_weather", "format": "celsius", "location": "Paris, France"}}<|eot_id|>'
== '{"function": {"_name": "get_current_weather", "location": "Paris, France", "format": "celsius"}}<|eot_id|>'
)
assert count == 28
assert last_response == response_snapshot
Expand Down Expand Up @@ -366,7 +366,7 @@ async def test_flash_llama_grammar_tools_sea_creatures_stream_required(
assert count == 29
assert (
tool_calls_generated
== '{"function": {"_name": "get_current_weather", "format": "celsius", "location": "San Francisco, CA"}}<|eot_id|>'
== '{"function": {"_name": "get_current_weather", "location": "San Francisco, CA", "format": "celsius"}}<|eot_id|>'
)
assert last_response == response_snapshot

Expand Down Expand Up @@ -465,6 +465,6 @@ async def test_flash_llama_grammar_tools_sea_creatures_stream_function_object(
assert count == 39
assert (
tool_calls_generated
== '{"function": {"_name": "get_n_day_weather_forecast", "format": "celsius", "location": "San Francisco, CA", "num_days":3}}<|eot_id|>'
== '{"function": {"_name": "get_n_day_weather_forecast", "location": "San Francisco, CA", "format": "celsius", "num_days":3}}<|eot_id|>'
)
assert last_response == response_snapshot
2 changes: 2 additions & 0 deletions router/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ authors.workspace = true
homepage.workspace = true

[dependencies]
anyhow = "1"
async-trait = "0.1.74"
async-stream = "0.3.5"
axum = { version = "0.7", features = ["json"] }
Expand All @@ -22,6 +23,7 @@ metrics-exporter-prometheus = { workspace = true }
nohash-hasher = "0.2.0"
opentelemetry = { version = "0.20.0", features = ["rt-tokio"] }
opentelemetry-otlp = "0.13.0"
outlines-core = { git = "https://github.com/dottxt-ai/outlines-core.git", rev = "ba10c619fc9bf3c487e43f49bdecb95a24bb465c" }
rand = "0.8.5"
reqwest = { version = "0.11.20", features = [] }
serde = "1.0.188"
Expand Down
2 changes: 1 addition & 1 deletion router/src/infer/chat_template.rs
Original file line number Diff line number Diff line change
Expand Up @@ -804,7 +804,7 @@ mod tests {
let tool_prompt = "This default prompt will be used".to_string();
let tools_and_prompt = Some((tools, tool_prompt));
let result = ct.apply(msgs, tools_and_prompt);
let expected = "<s>[INST] I'd like to show off how chat templating works! [/INST]Great! How can I help you today?</s> [INST] Just testing\n---\n[{\"type\":\"function\",\"function\":{\"description\":\"Get the current weather\",\"name\":\"get_current_weather\",\"arguments\":{\"properties\":{\"format\":{\"description\":\"The temperature unit to use. Infer this from the users location.\",\"enum\":[\"celsius\",\"fahrenheit\"],\"type\":\"string\"},\"location\":{\"description\":\"The city and state, e.g. San Francisco, CA\",\"type\":\"string\"}},\"required\":[\"location\",\"format\"],\"type\":\"object\"}}}]\nThis default prompt will be used [/INST]".to_string();
let expected = "<s>[INST] I'd like to show off how chat templating works! [/INST]Great! How can I help you today?</s> [INST] Just testing\n---\n[{\"type\":\"function\",\"function\":{\"description\":\"Get the current weather\",\"name\":\"get_current_weather\",\"arguments\":{\"type\":\"object\",\"properties\":{\"location\":{\"type\":\"string\",\"description\":\"The city and state, e.g. San Francisco, CA\"},\"format\":{\"type\":\"string\",\"enum\":[\"celsius\",\"fahrenheit\"],\"description\":\"The temperature unit to use. Infer this from the users location.\"}},\"required\":[\"location\",\"format\"]}}}]\nThis default prompt will be used [/INST]".to_string();
assert_eq!(result.unwrap(), expected);
}

Expand Down
15 changes: 10 additions & 5 deletions router/src/validation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use crate::{PyTokenizer, Tokenizer};
use base64::{engine::general_purpose::STANDARD, Engine};
use image::{ImageFormat, ImageReader};
use jsonschema::{Draft, JSONSchema};
use outlines_core::json_schema::to_regex as json_schema_to_regex;
use rand::{thread_rng, Rng};
use serde_json::Value;
use std::io::Cursor;
Expand Down Expand Up @@ -351,11 +352,13 @@ impl Validation {
"Grammar must have a 'properties' field".to_string(),
))?;

// Serialize json to string
ValidGrammar::Json(
serde_json::to_string(&json)
.map_err(|e| ValidationError::InvalidGrammar(e.to_string()))?,
)
// Do compilation in the router for performance. In the future, we
// should also move regex -> automaton compilation in the router,
// but this is not yet supported in pure Rust by outlines-core.
let grammar_regex = json_schema_to_regex(&json, None, &json)
.map_err(ValidationError::RegexFromSchema)?;

ValidGrammar::Regex(grammar_regex.to_string())
}
GrammarType::Regex(regex) => ValidGrammar::Regex(regex),
};
Expand Down Expand Up @@ -810,6 +813,8 @@ pub enum ValidationError {
Grammar,
#[error("grammar is not valid: {0}")]
InvalidGrammar(String),
#[error("cannot compile regex from schema: {0}")]
RegexFromSchema(anyhow::Error),
#[error("base64 encoding is invalid: {0}")]
InvalidBase64(#[from] base64::DecodeError),
#[error("invalid image: {0}")]
Expand Down
Loading

0 comments on commit 289aa48

Please sign in to comment.