-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
mike dupont
committed
Feb 19, 2024
1 parent
fa12415
commit 5980f9c
Showing
2 changed files
with
70 additions
and
13 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,17 +1,71 @@ | ||
# GBNF (GGML BNF) is a format for defining formal grammars to constrain model outputs in llama.cpp. | ||
# Backus-Naur Form (BNF) is a notation for describing the syntax of formal languages like programming languages, file formats, and protocols. GBNF is an extension of BNF that primarily adds a few modern regex-like features. | ||
# In GBNF, we define production rules that specify how a non-terminal (rule name) can be replaced with sequences of terminals (characters, specifically Unicode code points) and other non-terminals. The basic format of a production rule is nonterminal ::= sequence.... | ||
|
||
rhs ::= alternation | ||
lhs ::= identifier | ||
rule ::= lhs S "=" S rhs S | comment | ||
root ::= ( S rule S ) * | ||
|
||
# Terminals support the full range of Unicode. Unicode characters can be specified directly in the grammar, for example hiragana ::= [ぁ-ゟ], or with escapes: 8-bit (\xXX), 16-bit (\uXXXX) or 32-bit (\UXXXXXXXX). | ||
range ::= "-" | ||
factor_range ::= term S range S term | ||
|
||
# Character ranges can be negated with ^: | ||
negate ::= "^" | ||
|
||
#Sequences and Alternatives | ||
#The order of symbols in a sequence matter. For example, in "1. " move " " move "\n", the "1. " must come before the first move, etc. | ||
concatenation ::= ( S factor S ? ) + | ||
|
||
# Alternatives, denoted by |, give different sequences that are acceptable. | ||
alternation ::= "|" | ||
alternation ::= ( S concatenation S alternation ? ) + | ||
|
||
# Parentheses () can be used to group sequences, which allows for embedding alternatives in a larger rule or applying repetition and optional symbols (below) to a sequence. | ||
parens_open ::= "(" | ||
parens_close ::= ")" | ||
parens ::= parens_open | parens_close | ||
|
||
#Repetition and Optional Symbols | ||
occurance ::= repetition_plus | repetition_star | repetition_optional | ||
|
||
#* after a symbol or sequence means that it can be repeated zero or more times. | ||
repetition_star ::= "*" | ||
|
||
#+ denotes that the symbol or sequence should appear one or more times. | ||
repetition_plus ::= "+" | ||
|
||
#? makes the preceding symbol or sequence optional. | ||
repetition_optional ::= "?" | ||
|
||
|
||
#Comments and newlines | ||
#Comments can be specified with #: | ||
comment ::= "#" [a-zA-Z0-9 \t]* | ||
|
||
# Newlines are allowed between rules and between symbols or sequences nested inside parentheses. Additionally, a newline after an alternate marker | will continue the current rule, even outside of parentheses. | ||
|
||
|
||
letter ::= [a-zA-Z] | ||
digit ::= [0-9] | ||
S ::= ( " " | "\n" | "\t" | "\r" ) | ||
symbol ::= "[" | "]" | "{" | "}" | "(" | ")" | "<" | ">" | "'" | "=" | "|" | "." | "," | ";" | "-" | "+" | "*" | "?" | ||
braces_open ::= "[" | ||
braces_close ::= "]" | ||
braces_symbol ::= braces_open | braces_close | ||
|
||
quote ::= "\"" | ||
assignment ::= "::=" | ||
|
||
symbol ::= braces_symbol | parens | quotes |assignment | alternation | range | occurrence | negate | ||
|
||
character ::= letter | digit | symbol | "_" | " " | ||
identifier ::= letter ( letter | digit | "_" )* | ||
terminal ::= "'" character "'" ( character "'" ) "'" | ||
terminator ::= (";" | ".") | ||
term ::= "(" S rhs S ")" | "[" S rhs S "]" | "{" S rhs S "}" | terminal | identifier | ||
factor ::= term S "?" | term S "*" | term S "+" | term S "-" S term | term S | ||
concatenation ::= ( S factor S "," ? ) + | ||
alternation ::= ( S concatenation S "|" ? ) + | ||
rhs ::= alternation | ||
lhs ::= identifier | ||
rule ::= lhs S "=" S rhs S terminator comment* | ||
root ::= comment* ( S rule S ) * | ||
terminal ::= quote character+ quote | ||
group_term ::= parens_open S rhs S parens_close | ||
range_term ::= braces_open S rhs S braces_close | ||
term ::= group_term |range_term | terminal | identifier | ||
|
||
factor_occurence ::= term S occurence | ||
factor_negate ::= negate S factor | ||
factor ::= factor_occurence |factor_range | term S |