Skip to content

Commit

Permalink
updated grammar
Browse files Browse the repository at this point in the history
  • Loading branch information
mike dupont committed Feb 19, 2024
1 parent fa12415 commit 5980f9c
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 13 deletions.
7 changes: 5 additions & 2 deletions bin/simple_grammar.ml
Original file line number Diff line number Diff line change
Expand Up @@ -100,9 +100,12 @@ let aux dir suffix prompt1 client1 param_record =

let process_prompt: backend -> 'client_t2 -> string -> string -> string -> string -> int ->unit =
fun client1 param_record path model prompt1 suffix repeat ->
(print_endline ("Consider model: " ^ model ^ " path: "^ path ^ " prompt" ^ prompt1));
(print_endline ("Consider model: " ^ model ^ " path: "^ path ^ " prompt:" ^ prompt1));
for i = 1 to repeat do
let _ = aux (path ^ "_" ^(string_of_int i)) (suffix ^ (string_of_int i)) prompt1 client1 param_record in
let newprompt = prompt1 ^ (". Generate variant number " ^ (string_of_int i)) ^ ". Your response:" in
let newpath = (path ^ "_" ^(string_of_int i)) in
let newsuffix = (suffix ^ (string_of_int i)) in
let _ = aux newpath newsuffix newprompt client1 param_record in
()
done

Expand Down
76 changes: 65 additions & 11 deletions grammars/ebnf.ebnf
Original file line number Diff line number Diff line change
@@ -1,17 +1,71 @@
# GBNF (GGML BNF) is a format for defining formal grammars to constrain model outputs in llama.cpp.
# Backus-Naur Form (BNF) is a notation for describing the syntax of formal languages like programming languages, file formats, and protocols. GBNF is an extension of BNF that primarily adds a few modern regex-like features.
# In GBNF, we define production rules that specify how a non-terminal (rule name) can be replaced with sequences of terminals (characters, specifically Unicode code points) and other non-terminals. The basic format of a production rule is nonterminal ::= sequence....

rhs ::= alternation
lhs ::= identifier
rule ::= lhs S "=" S rhs S | comment
root ::= ( S rule S ) *

# Terminals support the full range of Unicode. Unicode characters can be specified directly in the grammar, for example hiragana ::= [ぁ-ゟ], or with escapes: 8-bit (\xXX), 16-bit (\uXXXX) or 32-bit (\UXXXXXXXX).
range ::= "-"
factor_range ::= term S range S term

# Character ranges can be negated with ^:
negate ::= "^"

#Sequences and Alternatives
#The order of symbols in a sequence matter. For example, in "1. " move " " move "\n", the "1. " must come before the first move, etc.
concatenation ::= ( S factor S ? ) +

# Alternatives, denoted by |, give different sequences that are acceptable.
alternation ::= "|"
alternation ::= ( S concatenation S alternation ? ) +

# Parentheses () can be used to group sequences, which allows for embedding alternatives in a larger rule or applying repetition and optional symbols (below) to a sequence.
parens_open ::= "("
parens_close ::= ")"
parens ::= parens_open | parens_close

#Repetition and Optional Symbols
occurance ::= repetition_plus | repetition_star | repetition_optional

#* after a symbol or sequence means that it can be repeated zero or more times.
repetition_star ::= "*"

#+ denotes that the symbol or sequence should appear one or more times.
repetition_plus ::= "+"

#? makes the preceding symbol or sequence optional.
repetition_optional ::= "?"


#Comments and newlines
#Comments can be specified with #:
comment ::= "#" [a-zA-Z0-9 \t]*

# Newlines are allowed between rules and between symbols or sequences nested inside parentheses. Additionally, a newline after an alternate marker | will continue the current rule, even outside of parentheses.


letter ::= [a-zA-Z]
digit ::= [0-9]
S ::= ( " " | "\n" | "\t" | "\r" )
symbol ::= "[" | "]" | "{" | "}" | "(" | ")" | "<" | ">" | "'" | "=" | "|" | "." | "," | ";" | "-" | "+" | "*" | "?"
braces_open ::= "["
braces_close ::= "]"
braces_symbol ::= braces_open | braces_close

quote ::= "\""
assignment ::= "::="

symbol ::= braces_symbol | parens | quotes |assignment | alternation | range | occurrence | negate

character ::= letter | digit | symbol | "_" | " "
identifier ::= letter ( letter | digit | "_" )*
terminal ::= "'" character "'" ( character "'" ) "'"
terminator ::= (";" | ".")
term ::= "(" S rhs S ")" | "[" S rhs S "]" | "{" S rhs S "}" | terminal | identifier
factor ::= term S "?" | term S "*" | term S "+" | term S "-" S term | term S
concatenation ::= ( S factor S "," ? ) +
alternation ::= ( S concatenation S "|" ? ) +
rhs ::= alternation
lhs ::= identifier
rule ::= lhs S "=" S rhs S terminator comment*
root ::= comment* ( S rule S ) *
terminal ::= quote character+ quote
group_term ::= parens_open S rhs S parens_close
range_term ::= braces_open S rhs S braces_close
term ::= group_term |range_term | terminal | identifier

factor_occurence ::= term S occurence
factor_negate ::= negate S factor
factor ::= factor_occurence |factor_range | term S

0 comments on commit 5980f9c

Please sign in to comment.