From 4bd82a85dfcb720ab06005e2e4b041981693dfb3 Mon Sep 17 00:00:00 2001 From: Whebon Date: Thu, 18 Jan 2024 10:32:51 +0100 Subject: [PATCH 01/27] Add a function to merge grammars --- src/HerbGrammar.jl | 1 + src/csg/csg.jl | 17 ++++++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/src/HerbGrammar.jl b/src/HerbGrammar.jl index dd0bd3a..20dce73 100644 --- a/src/HerbGrammar.jl +++ b/src/HerbGrammar.jl @@ -59,6 +59,7 @@ export cfg2csg, clearconstraints!, addconstraint!, + merge_grammars!, @pcfgrammar, expr2pcfgrammar, diff --git a/src/csg/csg.jl b/src/csg/csg.jl index 4e23afe..18081c9 100644 --- a/src/csg/csg.jl +++ b/src/csg/csg.jl @@ -131,4 +131,19 @@ clearconstraints!(grammar::ContextSensitiveGrammar) = empty!(grammar.constraints function Base.display(rulenode::RuleNode, grammar::ContextSensitiveGrammar) return rulenode2expr(rulenode, grammar) -end \ No newline at end of file +end + +""" + merge_grammars!(merge_to::Grammar, merge_from::Grammar) + +Adds all rules and constraints from `merge_from` to `merge_to`. +""" +function merge_grammars!(merge_to::Grammar, merge_from::Grammar) + for i in eachindex(merge_from.rules) + expression = :($(merge_from.types[i]) = $(merge_from.rules[i])) + add_rule!(merge_to, expression) + end + for i in eachindex(merge_from.constraints) + addconstraint!(merge_to, merge_from.constraints[i]) + end +end From e9b6d9f420447e67c691c2b3bf212eed9372fe4d Mon Sep 17 00:00:00 2001 From: Tilman Hinnerichs Date: Thu, 18 Jan 2024 12:18:36 +0100 Subject: [PATCH 02/27] Add test for merging grammars --- test/test_cfg.jl | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/test/test_cfg.jl b/test/test_cfg.jl index 4b069b1..b2552d0 100644 --- a/test/test_cfg.jl +++ b/test/test_cfg.jl @@ -1,5 +1,5 @@ @testset verbose=true "CFGs" begin - @testset "creating grammars" begin + @testset "Creating grammars" begin g₁ = @cfgrammar begin Real = |(1:9) end @@ -18,7 +18,7 @@ end - @testset "adding rules to grammar" begin + @testset "Adding rules to grammar" begin g₁ = @cfgrammar begin Real = |(1:2) end @@ -53,6 +53,22 @@ end + @testset "Merging two grammars" begin + g₁ = @csgrammar begin + Number = |(1:2) + Number = x + end + + g₂ = @csgrammar begin + Real = Real + Real + Real = Real * Real + end + + merge_grammars!(g₁, g₂) + + @test length(g₁.rules) == 5 + @test :Real ∈ g₁.types + end @testset "Writing and loading CFG to/from disk" begin g₁ = @cfgrammar begin From ce84c2af2a30a389f1eecbab4fcea3c3329da26a Mon Sep 17 00:00:00 2001 From: Reuben Gardos Reid <5456207+ReubenJ@users.noreply.github.com> Date: Thu, 18 Jan 2024 14:48:02 +0100 Subject: [PATCH 03/27] Generalize utility function for checking symbols This refactor allows for the reuse of the logic that checks whether a symbol is defined in a specific `Module` or `Main`/`Base`. --- src/utils.jl | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/src/utils.jl b/src/utils.jl index 76a937d..e1f7ca6 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -78,20 +78,24 @@ function _add_to_symboltable!(tab::SymbolTable, rule::Expr, mod::Module) return true end +function _apply_if_defined_in_modules(func::Function, s::Symbol, mods::Vector{Module}) + for mod in mods + if isdefined(mod, s) + func(mod, s) + return true + end + end + return false +end + +function _is_defined_in_modules(s::Symbol, mods::Vector{Module}) + _apply_if_defined_in_modules((mod, s) -> nothing, s, mods) +end function _add_to_symboltable!(tab::SymbolTable, s::Symbol, mod::Module) - if isdefined(mod, s) - tab[s] = getfield(mod, s) - return true - elseif isdefined(Base, s) - tab[s] = getfield(Base, s) - return true - elseif isdefined(Main, s) - tab[s] = getfield(Main, s) - return true - else - return false - end + _add_to_table! = (mod, s) -> tab[s] = getfield(mod, s) + + return _apply_if_defined_in_modules(_add_to_table!, s, [mod, Base, Main]) end From bcfec29f72d0176a06167d19adc601318a9ecdf8 Mon Sep 17 00:00:00 2001 From: Reuben Gardos Reid <5456207+ReubenJ@users.noreply.github.com> Date: Thu, 18 Jan 2024 14:52:29 +0100 Subject: [PATCH 04/27] Allow for module specification in `isvariable(...)` Allow the specification one or more `Module` when checking if a rule used by a `RuleNode` represents a variable, or if it is defined in one of the specified `Module`s, `Main`, or `Base`. Fixes #21. --- src/rulenode_operators.jl | 42 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/src/rulenode_operators.jl b/src/rulenode_operators.jl index 5c8cc14..9b018c6 100644 --- a/src/rulenode_operators.jl +++ b/src/rulenode_operators.jl @@ -293,12 +293,48 @@ nchildren(grammar::Grammar, node::RuleNode)::Int = length(child_types(grammar, n """ isvariable(grammar::Grammar, node::RuleNode)::Bool -Returns true if the rule used by `node` represents a variable. +Return true if the rule used by `node` represents a variable. """ -isvariable(grammar::Grammar, node::RuleNode)::Bool = grammar.isterminal[node.ind] && grammar.rules[node.ind] isa Symbol +isvariable(grammar::Grammar, node::RuleNode)::Bool = ( + grammar.isterminal[node.ind] && + grammar.rules[node.ind] isa Symbol && + !_is_defined_in_modules(grammar.rules[node.ind], [Main, Base]) +) +""" + isvariable(grammar::Grammar, node::RuleNode, mod::Module)::Bool + +Return true if the rule used by `node` represents a variable. + +Taking into account the symbols defined in the given module(s). +""" +isvariable(grammar::Grammar, node::RuleNode, mod::Module...)::Bool = ( + grammar.isterminal[node.ind] && + grammar.rules[node.ind] isa Symbol && + !_is_defined_in_modules(grammar.rules[node.ind], [mod..., Main, Base]) +) + +""" + isvariable(grammar::Grammar, ind::Int)::Bool -isvariable(grammar::Grammar, ind::Int)::Bool = grammar.isterminal[ind] && grammar.rules[ind] isa Symbol +Return true if the rule with index `ind` represents a variable. +""" +isvariable(grammar::Grammar, ind::Int)::Bool = ( + grammar.isterminal[ind] && + grammar.rules[ind] isa Symbol && + !_is_defined_in_modules(grammar.rules[ind], [Main, Base]) +) +""" + isvariable(grammar::Grammar, ind::Int, mod::Module)::Bool +Return true if the rule with index `ind` represents a variable. + +Taking into account the symbols defined in the given module(s). +""" +isvariable(grammar::Grammar, ind::Int, mod::Module...)::Bool = ( + grammar.isterminal[ind] && + grammar.rules[ind] isa Symbol && + !_is_defined_in_modules(grammar.rules[ind], [mod..., Main, Base]) +) """ contains_returntype(node::RuleNode, grammar::Grammar, sym::Symbol, maxdepth::Int=typemax(Int)) From 78bf6dfe769885da75c80aca7c36d9d1794bb914 Mon Sep 17 00:00:00 2001 From: Reuben Gardos Reid <5456207+ReubenJ@users.noreply.github.com> Date: Thu, 18 Jan 2024 14:55:57 +0100 Subject: [PATCH 05/27] Add tests for `isvariable` with specified module(s) --- test/runtests.jl | 1 + test/test_rulenode_operators.jl | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+) create mode 100644 test/test_rulenode_operators.jl diff --git a/test/runtests.jl b/test/runtests.jl index b64a4c3..60de23b 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -3,4 +3,5 @@ using Test @testset "HerbGrammar.jl" verbose=true begin include("test_cfg.jl") + include("test_rulenode_operators.jl") end diff --git a/test/test_rulenode_operators.jl b/test/test_rulenode_operators.jl new file mode 100644 index 0000000..dee0c14 --- /dev/null +++ b/test/test_rulenode_operators.jl @@ -0,0 +1,18 @@ +module SomeDefinitions + a_variable_that_is_defined = 7 +end + +@testset verbose = true "RuleNode Operators" begin + @testset "Check if a symbol is a variable" begin + g₁ = @cfgrammar begin + Real = |(1:5) + Real = a_variable + Real = a_variable_that_is_defined + end + + @test !isvariable(g₁, RuleNode(5, g₁), SomeDefinitions) + @test isvariable(g₁, RuleNode(6, g₁), SomeDefinitions) + @test !isvariable(g₁, RuleNode(7, g₁), SomeDefinitions) + @test isvariable(g₁, RuleNode(7, g₁)) + end +end From 57e7be7e3e7460eb1b294caafb9de6555eee91a9 Mon Sep 17 00:00:00 2001 From: Reuben Gardos Reid <5456207+ReubenJ@users.noreply.github.com> Date: Thu, 18 Jan 2024 15:19:18 +0100 Subject: [PATCH 06/27] Add test to show `add_rule!` failing quietly --- test/test_cfg.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/test/test_cfg.jl b/test/test_cfg.jl index 4b069b1..84c8482 100644 --- a/test/test_cfg.jl +++ b/test/test_cfg.jl @@ -51,6 +51,7 @@ @test g₂.rules[g₂.bytype[:Bool][1]] == :(Real ≤ Real) @test g₂.childtypes[g₂.bytype[:Bool][1]] == [:Real, :Real] + @test_throws ArgumentError add_rule!(g₂, :(Real != Bool)) end From a017396fd6ef5424ca76e8555ba3332896f368c2 Mon Sep 17 00:00:00 2001 From: Reuben Gardos Reid <5456207+ReubenJ@users.noreply.github.com> Date: Thu, 18 Jan 2024 15:29:51 +0100 Subject: [PATCH 07/27] Update `add_rule!` to throw an error If the expression passed for a rule is malformed, the function now throws an `ArgumentException` Fixes #37. --- src/grammar_base.jl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/grammar_base.jl b/src/grammar_base.jl index d7be8f9..9d1cef1 100644 --- a/src/grammar_base.jl +++ b/src/grammar_base.jl @@ -205,7 +205,7 @@ The syntax is identical to the syntax of [`@csgrammar`](@ref) and [`@cfgrammar`] Calls to this function are ignored if a rule is already in the grammar. """ function add_rule!(g::Grammar, e::Expr) - if e.head == :(=) + if e.head == :(=) && typeof(e.args[1]) == Symbol s = e.args[1] # Name of return type rule = e.args[2] # expression? rvec = Any[] @@ -219,6 +219,8 @@ function add_rule!(g::Grammar, e::Expr) push!(g.types, s) g.bytype[s] = push!(get(g.bytype, s, Int[]), length(g.rules)) end + else + throw(ArgumentError("Invalid rule: $e. Rules must be of the form `Symbol = Expr`")) end alltypes = collect(keys(g.bytype)) From 08f7973a6c1fe3ee6eea78edb546234ba9efe5c1 Mon Sep 17 00:00:00 2001 From: Sebastijan Dumancic Date: Thu, 18 Jan 2024 15:47:14 +0100 Subject: [PATCH 08/27] small clarification --- src/rulenode_operators.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rulenode_operators.jl b/src/rulenode_operators.jl index 9b018c6..b5ef545 100644 --- a/src/rulenode_operators.jl +++ b/src/rulenode_operators.jl @@ -293,7 +293,7 @@ nchildren(grammar::Grammar, node::RuleNode)::Int = length(child_types(grammar, n """ isvariable(grammar::Grammar, node::RuleNode)::Bool -Return true if the rule used by `node` represents a variable. +Return true if the rule used by `node` represents a variable in a program (essentially, an input to the program) """ isvariable(grammar::Grammar, node::RuleNode)::Bool = ( grammar.isterminal[node.ind] && From 8d937579737d1da707f08830da1dcaf1f6051ef1 Mon Sep 17 00:00:00 2001 From: Reuben Gardos Reid <5456207+ReubenJ@users.noreply.github.com> Date: Fri, 19 Jan 2024 10:33:38 +0100 Subject: [PATCH 09/27] Add test to show `add_rule!` type coercion bug Currently, the rule `true` is equivalent to the rule `1`. This means that if one of them already exists in the grammar, the new one is skipped. --- test/test_cfg.jl | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/test/test_cfg.jl b/test/test_cfg.jl index 4b069b1..bdc7880 100644 --- a/test/test_cfg.jl +++ b/test/test_cfg.jl @@ -86,4 +86,28 @@ rm("toy_pcfg.grammar") end + @testset "Test that strict equality is used during rule creation" begin + g₁ = @csgrammar begin + R = x + R = R + R + end + + add_rule!(g₁, :(R = 1 | 2)) + + add_rule!(g₁,:(Bool = true)) + + @test all(g₁.rules .== [:x, :(R + R), 1, 2, true]) + + g₁ = @csgrammar begin + R = x + R = R + R + end + + add_rule!(g₁,:(Bool = true)) + + add_rule!(g₁, :(R = 1 | 2)) + + @test all(g₁.rules .== [:x, :(R + R), true, 1, 2]) + end + end From 5a031ff34259bda69841fefc736f3c74c72c833a Mon Sep 17 00:00:00 2001 From: Reuben Gardos Reid <5456207+ReubenJ@users.noreply.github.com> Date: Fri, 19 Jan 2024 10:34:16 +0100 Subject: [PATCH 10/27] Use strict equality in `add_rule!` Fixes #47. --- src/grammar_base.jl | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/grammar_base.jl b/src/grammar_base.jl index d7be8f9..23a506c 100644 --- a/src/grammar_base.jl +++ b/src/grammar_base.jl @@ -211,13 +211,15 @@ function add_rule!(g::Grammar, e::Expr) rvec = Any[] parse_rule!(rvec, rule) for r ∈ rvec - if r ∈ g.rules - continue + # Only add a rule if it does not exist yet. Check for existance + # with strict equality so that true and 1 are not considered + # equal. that means we can't use `in` or `∈` for equality checking. + if !any(r === rule for rule ∈ g.rules) + push!(g.rules, r) + push!(g.iseval, iseval(rule)) + push!(g.types, s) + g.bytype[s] = push!(get(g.bytype, s, Int[]), length(g.rules)) end - push!(g.rules, r) - push!(g.iseval, iseval(rule)) - push!(g.types, s) - g.bytype[s] = push!(get(g.bytype, s, Int[]), length(g.rules)) end end alltypes = collect(keys(g.bytype)) From 7d3c15b67a26ce693d76d9ea19c95a6718ec0409 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luka=20Janji=C4=87?= Date: Thu, 18 Jan 2024 11:20:32 +0100 Subject: [PATCH 11/27] merge cfg with csg --- src/HerbGrammar.jl | 12 +-- src/cfg/cfg.jl | 155 ----------------------------------- src/cfg/probabilistic_cfg.jl | 111 ------------------------- src/csg/csg.jl | 94 ++++++++++++++++----- src/csg/probabilistic_csg.jl | 52 +++++++++++- src/grammar_io.jl | 27 +++--- 6 files changed, 141 insertions(+), 310 deletions(-) delete mode 100644 src/cfg/cfg.jl delete mode 100644 src/cfg/probabilistic_cfg.jl diff --git a/src/HerbGrammar.jl b/src/HerbGrammar.jl index 20dce73..02237c0 100644 --- a/src/HerbGrammar.jl +++ b/src/HerbGrammar.jl @@ -12,10 +12,6 @@ include("rulenode_operators.jl") include("utils.jl") include("nodelocation.jl") - -include("cfg/cfg.jl") -include("cfg/probabilistic_cfg.jl") - include("csg/csg.jl") include("csg/probabilistic_csg.jl") @@ -25,7 +21,7 @@ export ContextFree, ContextSensitive, - ContextFreeGrammar, +# ContextFreeGrammar, ContextSensitiveGrammar, AbstractRuleNode, @@ -36,7 +32,7 @@ export ProbabilisticCFG, @cfgrammar, - expr2cfgrammar, +# expr2cfgrammar, max_arity, isterminal, iseval, @@ -56,13 +52,13 @@ export @csgrammar, expr2csgrammar, - cfg2csg, +# cfg2csg, clearconstraints!, addconstraint!, merge_grammars!, @pcfgrammar, - expr2pcfgrammar, +# expr2pcfgrammar, @pcsgrammar, expr2pcsgrammar, diff --git a/src/cfg/cfg.jl b/src/cfg/cfg.jl deleted file mode 100644 index 675ebf5..0000000 --- a/src/cfg/cfg.jl +++ /dev/null @@ -1,155 +0,0 @@ -""" - ContextFreeGrammar <: Grammar - -Represents a context-free grammar and its production rules. -Consists of: - -- `rules::Vector{Any}`: A list of RHS of rules (subexpressions). -- `types::Vector{Symbol}`: A list of LHS of rules (types, all symbols). -- `isterminal::BitVector`: A bitvector where bit `i` represents whether rule `i` is terminal. -- `iseval::BitVector`: A bitvector where bit `i` represents whether rule i is an eval rule. -- `bytype::Dict{Symbol,Vector{Int}}`: A dictionary that maps a type to all rules of said type. -- `domains::Dict{Symbol, BitVector}`: A dictionary that maps a type to a domain bitvector. - The domain bitvector has bit `i` set to true iff the `i`th rule is of this type. -- `childtypes::Vector{Vector{Symbol}}`: A list of types of the children for each rule. - If a rule is terminal, the corresponding list is empty. -- `log_probabilities::Union{Vector{Real}, Nothing}`: A list of probabilities for each rule. - If the grammar is non-probabilistic, the list can be `nothing`. - -Use the [`@cfgrammar`](@ref) macro to create a [`ContextFreeGrammar`](@ref) object. -Use the [`@pcfgrammar`](@ref) macro to create a [`ContextFreeGrammar`](@ref) object with probabilities. -For context-sensitive grammars, see [`ContextSensitiveGrammar`](@ref). - -""" -mutable struct ContextFreeGrammar <: Grammar - rules::Vector{Any} # list of RHS of rules (subexpressions) - types::Vector{Union{Symbol, Nothing}} # list of LHS of rules (types, all symbols) - isterminal::BitVector # whether rule i is terminal - iseval::BitVector # whether rule i is an eval rule - bytype::Dict{Symbol,Vector{Int}} # maps type to all rules of said type - domains::Dict{Symbol,BitVector} # maps type to a domain bitvector - childtypes::Vector{Vector{Symbol}} # list of types of the children for each rule. Empty if terminal - log_probabilities::Union{Vector{Real}, Nothing} # list of probabilities for the rules if this is a probabilistic grammar -end - -""" - expr2cfgrammar(ex::Expr)::ContextFreeGrammar - -A function for converting an `Expr` to a [`ContextFreeGrammar`](@ref). -If the expression is hardcoded, you should use the [`@cfgrammar`](@ref) macro. -Only expressions in the correct format (see [`@cfgrammar`](@ref)) can be converted. - -### Example usage: - -```@example -grammar = expr2cfgrammar( - begin - R = x - R = 1 | 2 - R = R + R - end -) -``` -""" -function expr2cfgrammar(ex::Expr)::ContextFreeGrammar - rules = Any[] - types = Symbol[] - bytype = Dict{Symbol,Vector{Int}}() - for e ∈ ex.args - if isa(e, Expr) - if e.head == :(=) - s = e.args[1] # name of return type - rule = e.args[2] # expression? - rvec = Any[] - parse_rule!(rvec, rule) - for r ∈ rvec - push!(rules, r) - push!(types, s) - bytype[s] = push!(get(bytype, s, Int[]), length(rules)) - end - end - end - end - alltypes = collect(keys(bytype)) - is_terminal = [isterminal(rule, alltypes) for rule ∈ rules] - is_eval = [iseval(rule) for rule ∈ rules] - childtypes = [get_childtypes(rule, alltypes) for rule ∈ rules] - domains = Dict(type => BitArray(r ∈ bytype[type] for r ∈ 1:length(rules)) for type ∈ alltypes) - return ContextFreeGrammar(rules, types, is_terminal, is_eval, bytype, domains, childtypes, nothing) -end - -""" - @cfgrammar - -A macro for defining a [`ContextFreeGrammar`](@ref). - -### Example usage: -```julia -grammar = @cfgrammar begin - R = x - R = 1 | 2 - R = R + R -end -``` - -### Syntax: - -- Literals: Symbols that are already defined in Julia are considered literals, such as `1`, `2`, or `π`. - For example: `R = 1`. -- Variables: A variable is a symbol that is not a nonterminal symbol and not already defined in Julia. - For example: `R = x`. -- Functions: Functions and infix operators that are defined in Julia or the `Main` module can be used - with the default evaluator. For example: `R = R + R`, `R = f(a, b)`. -- Combinations: Multiple rules can be defined on a single line in the grammar definition using the `|` symbol. - For example: `R = 1 | 2 | 3`. -- Iterators: Another way to define multiple rules is by providing a Julia iterator after a `|` symbol. - For example: `R = |(1:9)`. - -### Related: - -- [`@csgrammar`](@ref) uses the same syntax to create [`ContextSensitiveGrammar`](@ref)s. -- [`@pcfgrammar`](@ref) uses a similar syntax to create probabilistic [`ContextFreeGrammar`](@ref)s. -""" -macro cfgrammar(ex) - return expr2cfgrammar(ex) -end - -parse_rule!(v::Vector{Any}, r) = push!(v, r) - -function parse_rule!(v::Vector{Any}, ex::Expr) - # Strips `LineNumberNode`s from the expression - Base.remove_linenums!(ex) - - if ex.head == :call && ex.args[1] == :| - terms = _expand_shorthand(ex.args) - - for t in terms - parse_rule!(v, t) - end - else - push!(v, ex) - end -end - -function _expand_shorthand(args::Vector{Any}) - # expand a rule using the `|` symbol: - # `X = |(1:3)`, `X = 1|2|3`, `X = |([1,2,3])` - # these should all be equivalent and should expand to - # the following 3 rules: `X = 1`, `X = 2`, and `X = 3` - if args[1] != :| - throw(ArgumentError("Tried to parse: $ex as a shorthand rule, but it is not a shorthand rule.")) - end - - if length(args) == 2 - to_expand = args[2] - if to_expand.args[1] == :(:) - expanded = collect(to_expand.args[2]:to_expand.args[3]) # (1:3) case - else - expanded = to_expand.args # ([1,2,3]) case - end - elseif length(args) == 3 - expanded = args[2:end] # 1|2|3 case - else - throw(ArgumentError("Failed to parse shorthand for rule: $ex")) - end -end diff --git a/src/cfg/probabilistic_cfg.jl b/src/cfg/probabilistic_cfg.jl deleted file mode 100644 index 0a180e4..0000000 --- a/src/cfg/probabilistic_cfg.jl +++ /dev/null @@ -1,111 +0,0 @@ -""" -Function for converting an `Expr` to a [`ContextFreeGrammar`](@ref) with probabilities. -If the expression is hardcoded, you should use the `@pcfgrammar` macro. -Only expressions in the correct format (see [`@pcfgrammar`](@ref)) can be converted. - -### Example usage: - -```@example -grammar = expr2pcsgrammar( - begin - 0.5 : R = x - 0.3 : R = 1 | 2 - 0.2 : R = R + R - end -) -``` -""" -function expr2pcfgrammar(ex::Expr)::ContextFreeGrammar - rules = Any[] - types = Symbol[] - probabilities = Real[] - bytype = Dict{Symbol,Vector{Int}}() - for e ∈ ex.args - if e isa Expr - if e.head == :(=) - left = e.args[1] # name of return type and probability - if left isa Expr && left.head == :call && left.args[1] == :(:) - p = left.args[2] # Probability - s = left.args[3] # Return type - rule = e.args[2].args[2] # extract rule from block expr - - rvec = Any[] - parse_rule!(rvec, rule) - for r ∈ rvec - push!(rules, r) - push!(types, s) - # Divide the probability of this line by the number of rules it defines. - push!(probabilities, p / length(rvec)) - bytype[s] = push!(get(bytype, s, Int[]), length(rules)) - end - else - @error "Rule without probability encountered in probabilistic grammar. Rule ignored." - end - end - end - end - alltypes = collect(keys(bytype)) - # Normalize probabilities for each type - for t ∈ alltypes - total_prob = sum(probabilities[i] for i ∈ bytype[t]) - if !(total_prob ≈ 1) - @warn "The probabilities for type $t don't add up to 1, so they will be normalized." - for i ∈ bytype[t] - probabilities[i] /= total_prob - end - end - end - - log_probabilities = [log(x) for x ∈ probabilities] - is_terminal = [isterminal(rule, alltypes) for rule in rules] - is_eval = [iseval(rule) for rule in rules] - childtypes = [get_childtypes(rule, alltypes) for rule in rules] - domains = Dict(type => BitArray(r ∈ bytype[type] for r ∈ 1:length(rules)) for type ∈ alltypes) - return ContextFreeGrammar(rules, types, is_terminal, is_eval, bytype, domains, childtypes, log_probabilities) -end - - -""" - @pcfgrammar - -A macro for defining a probabilistic [`ContextFreeGrammar`](@ref). - -### Example usage: -```julia -grammar = @pcfgrammar begin - 0.5 : R = x - 0.3 : R = 1 | 2 - 0.2 : R = R + R -end -``` - -### Syntax: - -The syntax of rules is identical to the syntax used by [`@cfgrammar`](@ref): - -- Literals: Symbols that are already defined in Julia are considered literals, such as `1`, `2`, or `π`. - For example: `R = 1`. -- Variables: A variable is a symbol that is not a nonterminal symbol and not already defined in Julia. - For example: `R = x`. -- Functions: Functions and infix operators that are defined in Julia or the `Main` module can be used - with the default evaluator. For example: `R = R + R`, `R = f(a, b)`. -- Combinations: Multiple rules can be defined on a single line in the grammar definition using the `|` symbol. - For example: `R = 1 | 2 | 3`. -- Iterators: Another way to define multiple rules is by providing a Julia iterator after a `|` symbol. - For example: `R = |(1:9)`. - -Every rule is also prefixed with a probability. -Rules and probabilities are separated using the `:` symbol. -If multiple rules are defined on a single line, the probability is equally divided between the rules. -The sum of probabilities for all rules of a certain non-terminal symbol should be equal to 1. -The probabilities are automatically scaled if this isn't the case. - - -### Related: - -- [`@pcsgrammar`](@ref) uses the same syntax to create probabilistic [`ContextSensitiveGrammar`](@ref)s. -- [`@cfgrammar`](@ref) uses a similar syntax to create non-probabilistic [`ContextFreeGrammar`](@ref)s. -""" -macro pcfgrammar(ex) - return expr2pcfgrammar(ex) -end \ No newline at end of file diff --git a/src/csg/csg.jl b/src/csg/csg.jl index 18081c9..74c23e7 100644 --- a/src/csg/csg.jl +++ b/src/csg/csg.jl @@ -21,7 +21,6 @@ Consists of: Use the [`@csgrammar`](@ref) macro to create a [`ContextSensitiveGrammar`](@ref) object. Use the [`@pcsgrammar`](@ref) macro to create a [`ContextSensitiveGrammar`](@ref) object with probabilities. -For context-free grammars, see [`ContextFreeGrammar`](@ref). """ mutable struct ContextSensitiveGrammar <: Grammar rules::Vector{Any} @@ -35,6 +34,16 @@ mutable struct ContextSensitiveGrammar <: Grammar constraints::Vector{Constraint} end +ContextSensitiveGrammar( + rules::Vector{<:Any}, + types::Vector{<:Union{Symbol, Nothing}}, + isterminal::Union{BitVector, Vector{Bool}}, + iseval::Union{BitVector, Vector{Bool}}, + bytype::Dict{Symbol, Vector{Int}}, + domains::Dict{Symbol, BitVector}, + childtypes::Vector{Vector{Symbol}}, + log_probabilities::Union{Vector{<:Real}, Nothing} +) = ContextSensitiveGrammar(rules, types, isterminal, iseval, bytype, domains, childtypes, log_probabilities, Constraint[]) """ expr2csgrammar(ex::Expr)::ContextSensitiveGrammar @@ -56,7 +65,30 @@ grammar = expr2csgrammar( ``` """ function expr2csgrammar(ex::Expr)::ContextSensitiveGrammar - return cfg2csg(expr2cfgrammar(ex)) + rules = Any[] + types = Symbol[] + bytype = Dict{Symbol,Vector{Int}}() + for e ∈ ex.args + if isa(e, Expr) + if e.head == :(=) + s = e.args[1] # name of return type + rule = e.args[2] # expression? + rvec = Any[] + parse_rule!(rvec, rule) + for r ∈ rvec + push!(rules, r) + push!(types, s) + bytype[s] = push!(get(bytype, s, Int[]), length(rules)) + end + end + end + end + alltypes = collect(keys(bytype)) + is_terminal = [isterminal(rule, alltypes) for rule ∈ rules] + is_eval = [iseval(rule) for rule ∈ rules] + childtypes = [get_childtypes(rule, alltypes) for rule ∈ rules] + domains = Dict(type => BitArray(r ∈ bytype[type] for r ∈ 1:length(rules)) for type ∈ alltypes) + return ContextSensitiveGrammar(rules, types, is_terminal, is_eval, bytype, domains, childtypes, nothing) end @@ -91,30 +123,54 @@ end ### Related: -- [`@cfgrammar`](@ref) uses the same syntax to create [`ContextFreeGrammar`](@ref)s. - [`@pcsgrammar`](@ref) uses a similar syntax to create probabilistic [`ContextSensitiveGrammar`](@ref)s. """ macro csgrammar(ex) return expr2csgrammar(ex) end -""" - cfg2csg(g::ContextFreeGrammar)::ContextSensitiveGrammar +macro cfgrammar(ex) + return expr2csgrammar(ex) +end -Converts a [`ContextFreeGrammar`](@ref) to a [`ContextSensitiveGrammar`](@ref) without any [`Constraint`](@ref)s. -""" -function cfg2csg(g::ContextFreeGrammar)::ContextSensitiveGrammar - return ContextSensitiveGrammar( - g.rules, - g.types, - g.isterminal, - g.iseval, - g.bytype, - g.domains, - g.childtypes, - g.log_probabilities, - [] - ) +parse_rule!(v::Vector{Any}, r) = push!(v, r) + +function parse_rule!(v::Vector{Any}, ex::Expr) + # Strips `LineNumberNode`s from the expression + Base.remove_linenums!(ex) + + if ex.head == :call && ex.args[1] == :| + terms = _expand_shorthand(ex.args) + + for t in terms + parse_rule!(v, t) + end + else + push!(v, ex) + end +end + +function _expand_shorthand(args::Vector{Any}) + # expand a rule using the `|` symbol: + # `X = |(1:3)`, `X = 1|2|3`, `X = |([1,2,3])` + # these should all be equivalent and should expand to + # the following 3 rules: `X = 1`, `X = 2`, and `X = 3` + if args[1] != :| + throw(ArgumentError("Tried to parse: $ex as a shorthand rule, but it is not a shorthand rule.")) + end + + if length(args) == 2 + to_expand = args[2] + if to_expand.args[1] == :(:) + expanded = collect(to_expand.args[2]:to_expand.args[3]) # (1:3) case + else + expanded = to_expand.args # ([1,2,3]) case + end + elseif length(args) == 3 + expanded = args[2:end] # 1|2|3 case + else + throw(ArgumentError("Failed to parse shorthand for rule: $ex")) + end end """ diff --git a/src/csg/probabilistic_csg.jl b/src/csg/probabilistic_csg.jl index 3610b00..20ccc7a 100644 --- a/src/csg/probabilistic_csg.jl +++ b/src/csg/probabilistic_csg.jl @@ -17,7 +17,52 @@ grammar = expr2pcsgrammar( ``` """ function expr2pcsgrammar(ex::Expr)::ContextSensitiveGrammar - cfg2csg(expr2pcfgrammar(ex)) + rules = Any[] + types = Symbol[] + probabilities = Real[] + bytype = Dict{Symbol,Vector{Int}}() + for e ∈ ex.args + if e isa Expr + if e.head == :(=) + left = e.args[1] # name of return type and probability + if left isa Expr && left.head == :call && left.args[1] == :(:) + p = left.args[2] # Probability + s = left.args[3] # Return type + rule = e.args[2].args[2] # extract rule from block expr + + rvec = Any[] + parse_rule!(rvec, rule) + for r ∈ rvec + push!(rules, r) + push!(types, s) + # Divide the probability of this line by the number of rules it defines. + push!(probabilities, p / length(rvec)) + bytype[s] = push!(get(bytype, s, Int[]), length(rules)) + end + else + @error "Rule without probability encountered in probabilistic grammar. Rule ignored." + end + end + end + end + alltypes = collect(keys(bytype)) + # Normalize probabilities for each type + for t ∈ alltypes + total_prob = sum(probabilities[i] for i ∈ bytype[t]) + if !(total_prob ≈ 1) + @warn "The probabilities for type $t don't add up to 1, so they will be normalized." + for i ∈ bytype[t] + probabilities[i] /= total_prob + end + end + end + + log_probabilities = [log(x) for x ∈ probabilities] + is_terminal = [isterminal(rule, alltypes) for rule in rules] + is_eval = [iseval(rule) for rule in rules] + childtypes = [get_childtypes(rule, alltypes) for rule in rules] + domains = Dict(type => BitArray(r ∈ bytype[type] for r ∈ 1:length(rules)) for type ∈ alltypes) + return ContextSensitiveGrammar(rules, types, is_terminal, is_eval, bytype, domains, childtypes, log_probabilities) end """ @@ -58,9 +103,12 @@ The probabilities are automatically scaled if this isn't the case. ### Related: -- [`@pcfgrammar`](@ref) uses the same syntax to create probabilistic [`ContextFreeGrammar`](@ref)s. - [`@csgrammar`](@ref) uses a similar syntax to create non-probabilistic [`ContextSensitiveGrammar`](@ref)s. """ macro pcsgrammar(ex) return expr2pcsgrammar(ex) +end + +macro pcfgrammar(ex) + return expr2pcsgrammar(ex) end \ No newline at end of file diff --git a/src/grammar_io.jl b/src/grammar_io.jl index 9ba028d..fc30452 100644 --- a/src/grammar_io.jl +++ b/src/grammar_io.jl @@ -1,9 +1,9 @@ """ - store_cfg(filepath::AbstractString, grammar::ContextFreeGrammar) + store_cfg(filepath::AbstractString, grammar::ContextSensitiveGrammar) -Writes a [`ContextFreeGrammar`](@ref) to the file provided by `filepath`. +Writes the context free part of a [`ContextSensitiveGrammar`](@ref) to the file provided by `filepath`. """ -function store_cfg(filepath::AbstractString, grammar::ContextFreeGrammar) +function store_cfg(filepath::AbstractString, grammar::ContextSensitiveGrammar) open(filepath, write=true) do file if !isprobabilistic(grammar) for (type, rule) ∈ zip(grammar.types, grammar.rules) @@ -19,15 +19,15 @@ end """ - read_cfg(filepath::AbstractString)::ContextFreeGrammar + read_cfg(filepath::AbstractString)::ContextSensitiveGrammar -Reads a [`ContextFreeGrammar`](@ref) from the file provided in `filepath`. +Reads the context free part of a [`ContextSensitiveGrammar`](@ref) from the file provided in `filepath`. !!! danger Only open trusted grammars. Parts of the grammar can be passed to Julia's `eval` function. """ -function read_cfg(filepath::AbstractString)::ContextFreeGrammar +function read_cfg(filepath::AbstractString)::ContextSensitiveGrammar # Read the contents of the file into a string file = open(filepath) program::AbstractString = read(file, String) @@ -37,19 +37,19 @@ function read_cfg(filepath::AbstractString)::ContextFreeGrammar ex::Expr = Meta.parse("begin $program end") # Convert the expression to a context-free grammar - return expr2cfgrammar(ex) + return expr2csgrammar(ex) end """ - read_pcfg(filepath::AbstractString)::ContextFreeGrammar + read_pcfg(filepath::AbstractString)::ContextSensitiveGrammar -Reads a probabilistic [`ContextFreeGrammar`](@ref) from a file provided in `filepath`. +Reads the context free part of a probabilistic [`ContextSensitiveGrammar`](@ref) from a file provided in `filepath`. !!! danger Only open trusted grammars. Parts of the grammar can be passed to Julia's `eval` function. """ -function read_pcfg(filepath::AbstractString)::ContextFreeGrammar +function read_pcfg(filepath::AbstractString)::ContextSensitiveGrammar # Read the contents of the file into a string file = open(filepath) program::AbstractString = read(file, String) @@ -59,7 +59,7 @@ function read_pcfg(filepath::AbstractString)::ContextFreeGrammar ex::Expr = Meta.parse("begin $program end") # Convert the expression to a context-free grammar - return expr2pcfgrammar(ex) + return expr2pcsgrammar(ex) end """ @@ -71,8 +71,7 @@ The `grammarpath` file will contain a [`ContextSensitiveGrammar`](@ref) definiti """ function store_csg(grammarpath::AbstractString, constraintspath::AbstractString, g::ContextSensitiveGrammar) # Store grammar as CFG - store_cfg(grammarpath, ContextFreeGrammar(g.rules, g.types, - g.isterminal, g.iseval, g.bytype, g.domains, g.childtypes, g.log_probabilities)) + store_cfg(grammarpath, g) # Store constraints separately open(constraintspath, write=true) do file @@ -84,7 +83,6 @@ end read_csg(grammarpath::AbstractString, constraintspath::AbstractString)::ContextSensitiveGrammar Reads a [`ContextSensitiveGrammar`](@ref) from the files at `grammarpath` and `constraintspath`. -The grammar path may also point to a [`ContextFreeGrammar`](@ref). !!! danger Only open trusted grammars. @@ -104,7 +102,6 @@ end read_pcsg(grammarpath::AbstractString, constraintspath::AbstractString)::ContextSensitiveGrammar Reads a probabilistic [`ContextSensitiveGrammar`](@ref) from the files at `grammarpath` and `constraintspath`. -The grammar path may also point to a [`ContextFreeGrammar`](@ref). !!! danger Only open trusted grammars. From 6081a746b1817825f369fc7554a7e56b033f4258 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luka=20Janji=C4=87?= Date: Thu, 18 Jan 2024 11:25:13 +0100 Subject: [PATCH 12/27] remove commented exports --- src/HerbGrammar.jl | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/HerbGrammar.jl b/src/HerbGrammar.jl index 02237c0..a62c5d9 100644 --- a/src/HerbGrammar.jl +++ b/src/HerbGrammar.jl @@ -21,8 +21,6 @@ export ContextFree, ContextSensitive, -# ContextFreeGrammar, - ContextSensitiveGrammar, AbstractRuleNode, RuleNode, @@ -32,7 +30,6 @@ export ProbabilisticCFG, @cfgrammar, -# expr2cfgrammar, max_arity, isterminal, iseval, @@ -52,13 +49,11 @@ export @csgrammar, expr2csgrammar, -# cfg2csg, clearconstraints!, addconstraint!, merge_grammars!, @pcfgrammar, -# expr2pcfgrammar, @pcsgrammar, expr2pcsgrammar, From 318cc98a1e0d46f650bb29f31c4187f9f6c594c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luka=20Janji=C4=87?= Date: Thu, 18 Jan 2024 16:12:50 +0100 Subject: [PATCH 13/27] factor out parsing and normalization of PCSG --- src/csg/probabilistic_csg.jl | 84 +++++++++++++++++++++++++++--------- 1 file changed, 63 insertions(+), 21 deletions(-) diff --git a/src/csg/probabilistic_csg.jl b/src/csg/probabilistic_csg.jl index 20ccc7a..1dcd95a 100644 --- a/src/csg/probabilistic_csg.jl +++ b/src/csg/probabilistic_csg.jl @@ -22,26 +22,16 @@ function expr2pcsgrammar(ex::Expr)::ContextSensitiveGrammar probabilities = Real[] bytype = Dict{Symbol,Vector{Int}}() for e ∈ ex.args - if e isa Expr - if e.head == :(=) - left = e.args[1] # name of return type and probability - if left isa Expr && left.head == :call && left.args[1] == :(:) - p = left.args[2] # Probability - s = left.args[3] # Return type - rule = e.args[2].args[2] # extract rule from block expr - - rvec = Any[] - parse_rule!(rvec, rule) - for r ∈ rvec - push!(rules, r) - push!(types, s) - # Divide the probability of this line by the number of rules it defines. - push!(probabilities, p / length(rvec)) - bytype[s] = push!(get(bytype, s, Int[]), length(rules)) - end - else - @error "Rule without probability encountered in probabilistic grammar. Rule ignored." - end + if e isa Expr + maybe_rules = parse_probabilistic_rule(e) + isnothing(maybe_rules) && continue # if rules is nothing, skip + s, prvec = maybe_rules + + for (p, r) ∈ prvec + push!(rules, r) + push!(types, s) + push!(probabilities, p) + bytype[s] = push!(get(bytype, s, Int[]), length(rules)) end end end @@ -62,7 +52,59 @@ function expr2pcsgrammar(ex::Expr)::ContextSensitiveGrammar is_eval = [iseval(rule) for rule in rules] childtypes = [get_childtypes(rule, alltypes) for rule in rules] domains = Dict(type => BitArray(r ∈ bytype[type] for r ∈ 1:length(rules)) for type ∈ alltypes) - return ContextSensitiveGrammar(rules, types, is_terminal, is_eval, bytype, domains, childtypes, log_probabilities) + + normalize!(ContextSensitiveGrammar(rules, types, is_terminal, is_eval, bytype, domains, childtypes, log_probabilities)) +end + +""" +Parses a single (potentially shorthand) derivation rule of a probabilistic [`ContextSensitiveGrammar`](@ref). +Returns `nothing` if the rule is not probabilistic, otherwise a `Tuple` of its type and a +`Vector` of probability-rule pairs it expands into. +""" +function parse_probabilistic_rule(e::Expr)::Union{Nothing, Tuple{Symbol, Vector{Tuple{Real, Any}}}} + prvec = Tuple{Real, Any}[] + if e.head == :(=) + left = e.args[1] # name of return type and probability + if left isa Expr && left.head == :call && left.args[1] == :(:) + p = left.args[2] # Probability + s = left.args[3] # Return type + rule = e.args[2].args[2] # extract rule from block expr + + rvec = Any[] + parse_rule!(rvec, rule) + for r ∈ rvec + # Divide the probability of this line by the number of rules it defines. + push!(prvec, (p / length(rvec), r)) + end + + return s, prvec + else + @error "Rule without probability encountered in probabilistic grammar. Rule ignored." + return nothing + end + end +end + + +""" +A function for normalizing the probabilities of a probabilistic [`ContextSensitiveGrammar`](@ref). +If the optional `type` argument is provided, only the rules of that type are normalized. +""" +function normalize!(g::ContextSensitiveGrammar, type::Union{Symbol, Nothing}=nothing) + probabilities = map(exp, g.log_probabilities) + types = isnothing(type) ? keys(g.bytype) : [type] + + for t ∈ types + total_prob = sum(probabilities[i] for i ∈ g.bytype[t]) + if !(total_prob ≈ 1) + for i ∈ g.bytype[t] + probabilities[i] /= total_prob + end + end + end + + g.log_probabilities = map(log, probabilities) + return g end """ From 12573f0cde1433e3061d5c4e89b486d95859adb2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luka=20Janji=C4=87?= Date: Thu, 18 Jan 2024 16:16:44 +0100 Subject: [PATCH 14/27] add add_rule! for probabilistic grammar --- src/grammar_base.jl | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/grammar_base.jl b/src/grammar_base.jl index c18c041..9a17a10 100644 --- a/src/grammar_base.jl +++ b/src/grammar_base.jl @@ -234,6 +234,18 @@ function add_rule!(g::Grammar, e::Expr) return g end +""" +Adds a probabilistic derivation rule. +""" +function add_rule!(g::Grammar, p::Real, e::Expr) + isprobabilistic(g) || throw(ArgumentError("adding a probabilistic rule to a non-probabilistic grammar")) + len₀ = length(g.rules) + add_rule!(g, e) + len₁ = length(g.rules) + nnew = len₁ - len₀ + append!(g.log_probabilities, repeat([log(p / nnew)], nnew)) + normalize!(g) +end """ remove_rule!(g::Grammar, idx::Int) From ee5a4227678fe5632d0a109e407abbb7a4c15da4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luka=20Janji=C4=87?= Date: Thu, 18 Jan 2024 16:17:00 +0100 Subject: [PATCH 15/27] add tests for probabilistic grammar --- test/runtests.jl | 2 +- test/{test_cfg.jl => test_csg.jl} | 46 +++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 1 deletion(-) rename test/{test_cfg.jl => test_csg.jl} (69%) diff --git a/test/runtests.jl b/test/runtests.jl index 60de23b..f4a7f0a 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -2,6 +2,6 @@ using HerbGrammar using Test @testset "HerbGrammar.jl" verbose=true begin - include("test_cfg.jl") + include("test_csg.jl") include("test_rulenode_operators.jl") end diff --git a/test/test_cfg.jl b/test/test_csg.jl similarity index 69% rename from test/test_cfg.jl rename to test/test_csg.jl index 691c019..0138e0a 100644 --- a/test/test_cfg.jl +++ b/test/test_csg.jl @@ -103,6 +103,52 @@ rm("toy_pcfg.grammar") end + @testset "creating probabilistic CFG" begin + g = @pcfgrammar begin + 0.5 : R = |(0:2) + 0.3 : R = x + 0.2 : B = true | false + end + + @test sum(map(exp, g.log_probabilities[g.bytype[:R]])) ≈ 1.0 + @test sum(map(exp, g.log_probabilities[g.bytype[:B]])) ≈ 1.0 + @test g.bytype[:R] == Int[1,2,3,4] + @test g.bytype[:B] == Int[5,6] + @test :R ∈ g.types && :B ∈ g.types + end + + @testset "creating a non-normalized PCFG" begin + g = @pcfgrammar begin + 0.5 : R = |(0:2) + 0.5 : R = x + 0.5 : B = true | false + end + + @test sum(map(exp, g.log_probabilities[g.bytype[:R]])) ≈ 1.0 + @test sum(map(exp, g.log_probabilities[g.bytype[:B]])) ≈ 1.0 + @test g.rules == [0, 1, 2, :x, :true, :false] + @test g.bytype[:R] == Int[1,2,3,4] + @test g.bytype[:B] == Int[5,6] + @test :R ∈ g.types && :B ∈ g.types + end + + @testset "Adding a rule to a probabilistic CSG" begin + g = @pcsgrammar begin + 0.5 : R = x + 0.5 : R = R + R + end + + add_rule!(g, 0.5, :(R = 1 | 2)) + + @test g.rules == [:x, :(R + R), 1, 2] + + add_rule!(g, 0.5, :(B = t | f)) + + @test g.bytype[:B] == Int[5, 6] + @test sum(map(exp, g.log_probabilities[g.bytype[:R]])) ≈ 1.0 + @test sum(map(exp, g.log_probabilities[g.bytype[:B]])) ≈ 1.0 + end + @testset "Test that strict equality is used during rule creation" begin g₁ = @csgrammar begin R = x From d4031eda03105cb1c8950ba6e290e4c4b09d793b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luka=20Janji=C4=87?= Date: Fri, 19 Jan 2024 10:12:58 +0100 Subject: [PATCH 16/27] remove non existent export --- src/HerbGrammar.jl | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/HerbGrammar.jl b/src/HerbGrammar.jl index a62c5d9..c91d0c9 100644 --- a/src/HerbGrammar.jl +++ b/src/HerbGrammar.jl @@ -27,8 +27,6 @@ export Hole, NodeLoc, - ProbabilisticCFG, - @cfgrammar, max_arity, isterminal, From 952abfeb4dbbb0815f79d150b67efc4d3d079de5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luka=20Janji=C4=87?= Date: Fri, 19 Jan 2024 10:14:03 +0100 Subject: [PATCH 17/27] merge grammar_io functions and adapt the respective tests --- src/HerbGrammar.jl | 3 -- src/grammar_io.jl | 109 ++++++++++++++++++--------------------------- test/test_csg.jl | 36 +++++++-------- 3 files changed, 62 insertions(+), 86 deletions(-) diff --git a/src/HerbGrammar.jl b/src/HerbGrammar.jl index c91d0c9..927b56f 100644 --- a/src/HerbGrammar.jl +++ b/src/HerbGrammar.jl @@ -67,9 +67,6 @@ export containedin, subsequenceof, has_children, - store_cfg, - read_cfg, - read_pcfg, store_csg, read_csg, read_pcsg, diff --git a/src/grammar_io.jl b/src/grammar_io.jl index fc30452..39867a9 100644 --- a/src/grammar_io.jl +++ b/src/grammar_io.jl @@ -1,9 +1,14 @@ +const OptionalPath = Union{Nothing, AbstractString} + """ - store_cfg(filepath::AbstractString, grammar::ContextSensitiveGrammar) + store_csg(g::ContextSensitiveGrammar, grammarpath::AbstractString, constraintspath::OptionalPath=nothing) -Writes the context free part of a [`ContextSensitiveGrammar`](@ref) to the file provided by `filepath`. +Writes a [`ContextSensitiveGrammar`](@ref) to the files at `grammarpath` and `constraintspath`. +The `grammarpath` file will contain a [`ContextSensitiveGrammar`](@ref) definition, and the +`constraintspath` file will contain the [`Constraint`](@ref)s of the [`ContextSensitiveGrammar`](@ref). """ -function store_cfg(filepath::AbstractString, grammar::ContextSensitiveGrammar) +function store_csg(grammar::ContextSensitiveGrammar, filepath::AbstractString, constraintspath::OptionalPath=nothing) + # Store grammar as CFG open(filepath, write=true) do file if !isprobabilistic(grammar) for (type, rule) ∈ zip(grammar.types, grammar.rules) @@ -15,21 +20,28 @@ function store_cfg(filepath::AbstractString, grammar::ContextSensitiveGrammar) end end end -end + + # exit if no constraintspath is given + isnothing(constraintspath) && return + # Store constraints separately + open(constraintspath, write=true) do file + serialize(file, grammar.constraints) + end +end """ - read_cfg(filepath::AbstractString)::ContextSensitiveGrammar + read_csg(grammarpath::AbstractString, constraintspath::OptionalPath=nothing)::ContextSensitiveGrammar -Reads the context free part of a [`ContextSensitiveGrammar`](@ref) from the file provided in `filepath`. +Reads a [`ContextSensitiveGrammar`](@ref) from the files at `grammarpath` and `constraintspath`. !!! danger Only open trusted grammars. Parts of the grammar can be passed to Julia's `eval` function. """ -function read_cfg(filepath::AbstractString)::ContextSensitiveGrammar +function read_csg(grammarpath::AbstractString, constraintspath::OptionalPath=nothing)::ContextSensitiveGrammar # Read the contents of the file into a string - file = open(filepath) + file = open(grammarpath) program::AbstractString = read(file, String) close(file) @@ -37,21 +49,32 @@ function read_cfg(filepath::AbstractString)::ContextSensitiveGrammar ex::Expr = Meta.parse("begin $program end") # Convert the expression to a context-free grammar - return expr2csgrammar(ex) + g = expr2csgrammar(ex) + + if !isnothing(constraintspath) + file = open(constraintspath) + constraints = deserialize(file) + close(file) + else + constraints = Constraint[] + end + + return ContextSensitiveGrammar(g.rules, g.types, g.isterminal, + g.iseval, g.bytype, g.domains, g.childtypes, g.log_probabilities, constraints) end """ - read_pcfg(filepath::AbstractString)::ContextSensitiveGrammar + read_pcsg(grammarpath::AbstractString, constraintspath::OptionalPath=nothing)::ContextSensitiveGrammar -Reads the context free part of a probabilistic [`ContextSensitiveGrammar`](@ref) from a file provided in `filepath`. +Reads a probabilistic [`ContextSensitiveGrammar`](@ref) from the files at `grammarpath` and `constraintspath`. !!! danger Only open trusted grammars. Parts of the grammar can be passed to Julia's `eval` function. """ -function read_pcfg(filepath::AbstractString)::ContextSensitiveGrammar +function read_pcsg(grammarpath::AbstractString, constraintspath::OptionalPath=nothing)::ContextSensitiveGrammar # Read the contents of the file into a string - file = open(filepath) + file = open(grammarpath) program::AbstractString = read(file, String) close(file) @@ -59,60 +82,16 @@ function read_pcfg(filepath::AbstractString)::ContextSensitiveGrammar ex::Expr = Meta.parse("begin $program end") # Convert the expression to a context-free grammar - return expr2pcsgrammar(ex) -end - -""" - store_csg(grammarpath::AbstractString, constraintspath::AbstractString, g::ContextSensitiveGrammar) - -Writes a [`ContextSensitiveGrammar`](@ref) to the files at `grammarpath` and `constraintspath`. -The `grammarpath` file will contain a [`ContextSensitiveGrammar`](@ref) definition, and the -`constraintspath` file will contain the [`Constraint`](@ref)s of the [`ContextSensitiveGrammar`](@ref). -""" -function store_csg(grammarpath::AbstractString, constraintspath::AbstractString, g::ContextSensitiveGrammar) - # Store grammar as CFG - store_cfg(grammarpath, g) + g = expr2pcsgrammar(ex) - # Store constraints separately - open(constraintspath, write=true) do file - serialize(file, g.constraints) + if !isnothing(constraintspath) + file = open(constraintspath) + constraints = deserialize(file) + close(file) + else + constraints = Constraint[] end -end - -""" - read_csg(grammarpath::AbstractString, constraintspath::AbstractString)::ContextSensitiveGrammar - -Reads a [`ContextSensitiveGrammar`](@ref) from the files at `grammarpath` and `constraintspath`. - -!!! danger - Only open trusted grammars. - Parts of the grammar can be passed to Julia's `eval` function. -""" -function read_csg(grammarpath::AbstractString, constraintspath::AbstractString)::ContextSensitiveGrammar - g = read_cfg(grammarpath) - file = open(constraintspath) - constraints = deserialize(file) - close(file) - - return ContextSensitiveGrammar(g.rules, g.types, g.isterminal, - g.iseval, g.bytype, g.domains, g.childtypes, g.log_probabilities, constraints) -end - -""" - read_pcsg(grammarpath::AbstractString, constraintspath::AbstractString)::ContextSensitiveGrammar - -Reads a probabilistic [`ContextSensitiveGrammar`](@ref) from the files at `grammarpath` and `constraintspath`. - -!!! danger - Only open trusted grammars. - Parts of the grammar can be passed to Julia's `eval` function. -""" -function read_pcsg(grammarpath::AbstractString, constraintspath::AbstractString)::ContextSensitiveGrammar - g = read_pcfg(grammarpath) - file = open(constraintspath) - constraints = deserialize(file) - close(file) - + return ContextSensitiveGrammar(g.rules, g.types, g.isterminal, g.iseval, g.bytype, g.domains, g.childtypes, g.log_probabilities, constraints) end diff --git a/test/test_csg.jl b/test/test_csg.jl index 0138e0a..4e54f14 100644 --- a/test/test_csg.jl +++ b/test/test_csg.jl @@ -1,17 +1,17 @@ -@testset verbose=true "CFGs" begin +@testset verbose=true "CSGs" begin @testset "Creating grammars" begin - g₁ = @cfgrammar begin + g₁ = @csgrammar begin Real = |(1:9) end @test g₁.rules == collect(1:9) @test :Real ∈ g₁.types - g₂ = @cfgrammar begin + g₂ = @csgrammar begin Real = |([1,2,3]) end @test g₂.rules == [1,2,3] - g₃ = @cfgrammar begin + g₃ = @csgrammar begin Real = 1 | 2 | 3 end @test g₃.rules == [1,2,3] @@ -19,7 +19,7 @@ @testset "Adding rules to grammar" begin - g₁ = @cfgrammar begin + g₁ = @csgrammar begin Real = |(1:2) end @@ -40,7 +40,7 @@ @test g₁.rules == collect(1:9) # Adding other types - g₂ = @cfgrammar begin + g₂ = @csgrammar begin Real = 1 | 2 | 3 end @@ -71,14 +71,14 @@ @test :Real ∈ g₁.types end - @testset "Writing and loading CFG to/from disk" begin - g₁ = @cfgrammar begin + @testset "Writing and loading CSG to/from disk" begin + g₁ = @csgrammar begin Real = |(1:5) Real = 6 | 7 | 8 end - store_cfg("toy_cfg.grammar", g₁) - g₂ = read_cfg("toy_cfg.grammar") + store_csg(g₁, "toy_cfg.grammar") + g₂ = read_csg("toy_cfg.grammar") @test :Real ∈ g₂.types @test g₂.rules == collect(1:8) @@ -86,14 +86,14 @@ rm("toy_cfg.grammar") end - @testset "Writing and loading probabilistic CFG to/from disk" begin - g₁ = @pcfgrammar begin + @testset "Writing and loading probabilistic CSG to/from disk" begin + g₁ = @pcsgrammar begin 0.5 : Real = |(0:3) 0.5 : Real = x end - store_cfg("toy_pcfg.grammar", g₁) - g₂ = read_pcfg("toy_pcfg.grammar") + store_csg(g₁, "toy_pcfg.grammar") + g₂ = read_pcsg("toy_pcfg.grammar") @test :Real ∈ g₂.types @test g₂.rules == [0, 1, 2, 3, :x] @test g₂.log_probabilities == g₁.log_probabilities @@ -103,8 +103,8 @@ rm("toy_pcfg.grammar") end - @testset "creating probabilistic CFG" begin - g = @pcfgrammar begin + @testset "creating probabilistic CSG" begin + g = @pcsgrammar begin 0.5 : R = |(0:2) 0.3 : R = x 0.2 : B = true | false @@ -117,8 +117,8 @@ @test :R ∈ g.types && :B ∈ g.types end - @testset "creating a non-normalized PCFG" begin - g = @pcfgrammar begin + @testset "creating a non-normalized PCSG" begin + g = @pcsgrammar begin 0.5 : R = |(0:2) 0.5 : R = x 0.5 : B = true | false From ee101d00afb61468f24e0533ab4452a0b7589ffa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luka=20Janji=C4=87?= Date: Fri, 19 Jan 2024 10:23:31 +0100 Subject: [PATCH 18/27] remove the ugly return type annotation --- src/csg/probabilistic_csg.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/csg/probabilistic_csg.jl b/src/csg/probabilistic_csg.jl index 1dcd95a..a8c6522 100644 --- a/src/csg/probabilistic_csg.jl +++ b/src/csg/probabilistic_csg.jl @@ -61,7 +61,7 @@ Parses a single (potentially shorthand) derivation rule of a probabilistic [`Con Returns `nothing` if the rule is not probabilistic, otherwise a `Tuple` of its type and a `Vector` of probability-rule pairs it expands into. """ -function parse_probabilistic_rule(e::Expr)::Union{Nothing, Tuple{Symbol, Vector{Tuple{Real, Any}}}} +function parse_probabilistic_rule(e::Expr) prvec = Tuple{Real, Any}[] if e.head == :(=) left = e.args[1] # name of return type and probability From 13f0c72168564a8465898e6ab0242eaefb275bd9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luka=20Janji=C4=87?= Date: Fri, 19 Jan 2024 10:26:54 +0100 Subject: [PATCH 19/27] make one test use @cfgrammar --- test/test_csg.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/test_csg.jl b/test/test_csg.jl index 4e54f14..7ed214a 100644 --- a/test/test_csg.jl +++ b/test/test_csg.jl @@ -1,17 +1,17 @@ @testset verbose=true "CSGs" begin @testset "Creating grammars" begin - g₁ = @csgrammar begin + g₁ = @cfgrammar begin Real = |(1:9) end @test g₁.rules == collect(1:9) @test :Real ∈ g₁.types - g₂ = @csgrammar begin + g₂ = @cfgrammar begin Real = |([1,2,3]) end @test g₂.rules == [1,2,3] - g₃ = @csgrammar begin + g₃ = @cfgrammar begin Real = 1 | 2 | 3 end @test g₃.rules == [1,2,3] From f1e38d5e8400ad44157effaa4352b2b136bd6b91 Mon Sep 17 00:00:00 2001 From: Tilman Hinnerichs Date: Tue, 6 Feb 2024 13:21:06 +0100 Subject: [PATCH 20/27] Fix pretty-printing expressions with Holes --- src/rulenode_operators.jl | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/rulenode_operators.jl b/src/rulenode_operators.jl index b5ef545..56fb74c 100644 --- a/src/rulenode_operators.jl +++ b/src/rulenode_operators.jl @@ -201,8 +201,8 @@ function _rulenode2expr(expr::Expr, rulenode::RuleNode, grammar::Grammar, j=0) elseif haskey(grammar.bytype, arg) child = rulenode.children[j+=1] if isa(child, Hole) - expr.args[k] = _rulenode2expr(child, grammar) - continue + expr.args[k] = _rulenode2expr(child, grammar) + continue end expr.args[k] = (child._val !== nothing) ? child._val : deepcopy(grammar.rules[child.ind]) @@ -219,6 +219,9 @@ function _rulenode2expr(typ::Symbol, rulenode::RuleNode, grammar::Grammar, j=0) retval = typ if haskey(grammar.bytype, typ) child = rulenode.children[1] + if isa(child, Hole) + return retval, j + end retval = (child._val !== nothing) ? child._val : deepcopy(grammar.rules[child.ind]) if !grammar.isterminal[child.ind] From c7bb3e77e0e7e04343d816405902ed5c88b0f190 Mon Sep 17 00:00:00 2001 From: Tilman Hinnerichs Date: Wed, 7 Feb 2024 09:19:33 +0100 Subject: [PATCH 21/27] Update introduction --- README.md | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 6d29e91..637c3cd 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,16 @@ -# Grammar.jl [![Build Status](https://github.com/Herb-AI/HerbGrammar.jl/actions/workflows/CI.yml/badge.svg?branch=master)](https://github.com/Herb-AI/HerbGrammar.jl/actions/workflows/CI.yml?query=branch%3Amaster) +[![Dev-Docs](https://img.shields.io/badge/docs-latest-blue.svg)](https://Herb-AI.github.io/Herb.jl/dev) + +# HerbGrammar.jl This package contains functionality for declaring grammars for the Herb Program Synthesis framework. +For full documentation please see the [`Herb.jl` documentation](https://herb-ai.github.io/Herb.jl/dev/). + ## Getting started -To use this project, initialize the project with +For a quick tutorial on how to get started with using `HerbSearch.jl` have a look at our [introductory tutorial](https://herb-ai.github.io/Herb.jl/dev/get_started/). +If you want to help developing this project, initialize the project with ```shell julia --project=. -e 'using Pkg; Pkg.instantiate()' ``` From 10d5a2a6c506709d4e1177367168a4911b852ace Mon Sep 17 00:00:00 2001 From: Reuben Gardos Reid <5456207+ReubenJ@users.noreply.github.com> Date: Thu, 22 Feb 2024 16:28:25 +0100 Subject: [PATCH 22/27] Add test to show that empty grammars cannot be created --- test/test_csg.jl | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/test/test_csg.jl b/test/test_csg.jl index 7ed214a..b059adc 100644 --- a/test/test_csg.jl +++ b/test/test_csg.jl @@ -1,4 +1,16 @@ @testset verbose=true "CSGs" begin + @testset "Create empty grammar" begin + g = @csgrammar begin end + @test isempty(g.rules) + @test isempty(g.types) + @test isempty(g.isterminal) + @test isempty(g.iseval) + @test isempty(g.bytype) + @test isempty(g.domains) + @test isempty(g.childtypes) + @test isempty(g.log_probabilities) + end + @testset "Creating grammars" begin g₁ = @cfgrammar begin Real = |(1:9) From 360bc1912762513293f1e71600f877f75bb293c9 Mon Sep 17 00:00:00 2001 From: Reuben Gardos Reid <5456207+ReubenJ@users.noreply.github.com> Date: Thu, 22 Feb 2024 18:05:24 +0100 Subject: [PATCH 23/27] Fix added test for empty grammars --- test/test_csg.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_csg.jl b/test/test_csg.jl index b059adc..30864e2 100644 --- a/test/test_csg.jl +++ b/test/test_csg.jl @@ -8,7 +8,7 @@ @test isempty(g.bytype) @test isempty(g.domains) @test isempty(g.childtypes) - @test isempty(g.log_probabilities) + @test isnothing(g.log_probabilities) end @testset "Creating grammars" begin From 014731ef3adcd01713c1272fa1c90e92d53fe490 Mon Sep 17 00:00:00 2001 From: Reuben Gardos Reid <5456207+ReubenJ@users.noreply.github.com> Date: Thu, 22 Feb 2024 18:09:23 +0100 Subject: [PATCH 24/27] Add type annotations in `expr2csgrammar` Without them, an empty grammar results in many `Vector{Any}`s, closes #53 --- src/csg/csg.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/csg/csg.jl b/src/csg/csg.jl index 74c23e7..07f808d 100644 --- a/src/csg/csg.jl +++ b/src/csg/csg.jl @@ -84,9 +84,9 @@ function expr2csgrammar(ex::Expr)::ContextSensitiveGrammar end end alltypes = collect(keys(bytype)) - is_terminal = [isterminal(rule, alltypes) for rule ∈ rules] - is_eval = [iseval(rule) for rule ∈ rules] - childtypes = [get_childtypes(rule, alltypes) for rule ∈ rules] + is_terminal::Vector{Bool} = [isterminal(rule, alltypes) for rule ∈ rules] + is_eval::Vector{Bool} = [iseval(rule) for rule ∈ rules] + childtypes::Vector{Vector{Symbol}} = [get_childtypes(rule, alltypes) for rule ∈ rules] domains = Dict(type => BitArray(r ∈ bytype[type] for r ∈ 1:length(rules)) for type ∈ alltypes) return ContextSensitiveGrammar(rules, types, is_terminal, is_eval, bytype, domains, childtypes, nothing) end From 24c5c8cc98b44fe913570c9d99d48444ea6d95b3 Mon Sep 17 00:00:00 2001 From: Tilman Hinnerichs Date: Mon, 26 Feb 2024 10:48:12 +0100 Subject: [PATCH 25/27] Update HerbCore dependency --- Project.toml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Project.toml b/Project.toml index a9c50e8..dd2840a 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "HerbGrammar" uuid = "4ef9e186-2fe5-4b24-8de7-9f7291f24af7" authors = ["Sebastijan Dumancic ", "Jaap de Jong ", "Nicolae Filat ", "Piotr Cichoń "] -version = "0.1.0" +version = "0.2.0" [deps] AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c" @@ -11,11 +11,11 @@ TreeView = "39424ebd-4cf3-5550-a685-96706a953f40" HerbCore = "2b23ba43-8213-43cb-b5ea-38c12b45bd45" [compat] -AbstractTrees = "0.4" -DataStructures = "0.17,0.18" -TreeView = "0.5" -HerbCore = "0.1.0" -julia = "1.8" +AbstractTrees = "^0.4" +DataStructures = "^0.17,0.18" +TreeView = "^0.5" +HerbCore = "^0.2.0" +julia = "^1.8" [extras] Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" From 7ccaa2ee96cb03e86af223246fe10ac41d014275 Mon Sep 17 00:00:00 2001 From: Tilman Hinnerichs Date: Mon, 26 Feb 2024 10:50:28 +0100 Subject: [PATCH 26/27] Update HerbCore dependency --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index dd2840a..7884d33 100644 --- a/Project.toml +++ b/Project.toml @@ -12,7 +12,7 @@ HerbCore = "2b23ba43-8213-43cb-b5ea-38c12b45bd45" [compat] AbstractTrees = "^0.4" -DataStructures = "^0.17,0.18" +DataStructures = "^0.18" TreeView = "^0.5" HerbCore = "^0.2.0" julia = "^1.8" From e4664e998cd8bc3254df3c9a7c304284ac76d31c Mon Sep 17 00:00:00 2001 From: Tilman Hinnerichs Date: Mon, 26 Feb 2024 10:56:33 +0100 Subject: [PATCH 27/27] Grammar -> AbstractGrammar according to HerbCore 0.2 --- src/csg/csg.jl | 10 ++--- src/grammar_base.jl | 78 +++++++++++++++++++-------------------- src/rulenode_operators.jl | 76 +++++++++++++++++++------------------- src/utils.jl | 20 +++++----- 4 files changed, 92 insertions(+), 92 deletions(-) diff --git a/src/csg/csg.jl b/src/csg/csg.jl index 07f808d..375414b 100644 --- a/src/csg/csg.jl +++ b/src/csg/csg.jl @@ -1,8 +1,8 @@ """ - ContextSensitiveGrammar <: Grammar + ContextSensitiveGrammar <: AbstractGrammar Represents a context-sensitive grammar. -Extends [`Grammar`](@ref) with constraints. +Extends [`AbstractGrammar`](@ref) with constraints. Consists of: @@ -22,7 +22,7 @@ Consists of: Use the [`@csgrammar`](@ref) macro to create a [`ContextSensitiveGrammar`](@ref) object. Use the [`@pcsgrammar`](@ref) macro to create a [`ContextSensitiveGrammar`](@ref) object with probabilities. """ -mutable struct ContextSensitiveGrammar <: Grammar +mutable struct ContextSensitiveGrammar <: AbstractGrammar rules::Vector{Any} types::Vector{Union{Symbol, Nothing}} isterminal::BitVector @@ -190,11 +190,11 @@ function Base.display(rulenode::RuleNode, grammar::ContextSensitiveGrammar) end """ - merge_grammars!(merge_to::Grammar, merge_from::Grammar) + merge_grammars!(merge_to::AbstractGrammar, merge_from::AbstractGrammar) Adds all rules and constraints from `merge_from` to `merge_to`. """ -function merge_grammars!(merge_to::Grammar, merge_from::Grammar) +function merge_grammars!(merge_to::AbstractGrammar, merge_from::AbstractGrammar) for i in eachindex(merge_from.rules) expression = :($(merge_from.types[i]) = $(merge_from.rules[i])) add_rule!(merge_to, expression) diff --git a/src/grammar_base.jl b/src/grammar_base.jl index 9a17a10..cbe53e2 100644 --- a/src/grammar_base.jl +++ b/src/grammar_base.jl @@ -45,63 +45,63 @@ function get_childtypes(rule::Any, types::AbstractVector{Symbol}) return retval end -Base.getindex(grammar::Grammar, typ::Symbol) = grammar.bytype[typ] +Base.getindex(grammar::AbstractGrammar, typ::Symbol) = grammar.bytype[typ] """ - nonterminals(grammar::Grammar)::Vector{Symbol} + nonterminals(grammar::AbstractGrammar)::Vector{Symbol} -Returns a list of the nonterminals or types in the [`Grammar`](@ref). +Returns a list of the nonterminals or types in the [`AbstractGrammar`](@ref). """ -nonterminals(grammar::Grammar)::Vector{Symbol} = collect(keys(grammar.bytype)) +nonterminals(grammar::AbstractGrammar)::Vector{Symbol} = collect(keys(grammar.bytype)) """ - return_type(grammar::Grammar, rule_index::Int)::Symbol + return_type(grammar::AbstractGrammar, rule_index::Int)::Symbol Returns the type of the production rule at `rule_index`. """ -return_type(grammar::Grammar, rule_index::Int) = grammar.types[rule_index] +return_type(grammar::AbstractGrammar, rule_index::Int) = grammar.types[rule_index] """ - child_types(grammar::Grammar, rule_index::Int) + child_types(grammar::AbstractGrammar, rule_index::Int) Returns the types of the children (nonterminals) of the production rule at `rule_index`. """ -child_types(grammar::Grammar, rule_index::Int) = grammar.childtypes[rule_index] +child_types(grammar::AbstractGrammar, rule_index::Int) = grammar.childtypes[rule_index] """ - get_domain(g::Grammar, type::Symbol)::BitVector + get_domain(g::AbstractGrammar, type::Symbol)::BitVector Returns the domain for the hole of a certain type as a `BitVector` of the same length as the number of rules in the grammar. Bit `i` is set to `true` iff rule `i` is of type `type`. !!! info Since this function can be intensively used when exploring a program space defined by a grammar, - the outcomes of this function are precomputed and stored in the `domains` field in a [`Grammar`](@ref). + the outcomes of this function are precomputed and stored in the `domains` field in a [`AbstractGrammar`](@ref). """ -get_domain(g::Grammar, type::Symbol)::BitVector = deepcopy(g.domains[type]) +get_domain(g::AbstractGrammar, type::Symbol)::BitVector = deepcopy(g.domains[type]) """ - get_domain(g::Grammar, rules::Vector{Int})::BitVector + get_domain(g::AbstractGrammar, rules::Vector{Int})::BitVector Takes a domain `rules` defined as a vector of ints and converts it to a domain defined as a `BitVector`. """ -get_domain(g::Grammar, rules::Vector{Int})::BitVector = BitArray(r ∈ rules for r ∈ 1:length(g.rules)) +get_domain(g::AbstractGrammar, rules::Vector{Int})::BitVector = BitArray(r ∈ rules for r ∈ 1:length(g.rules)) """ - isterminal(grammar::Grammar, rule_index::Int)::Bool + isterminal(grammar::AbstractGrammar, rule_index::Int)::Bool Returns true if the production rule at `rule_index` is terminal, i.e., does not contain any nonterminal symbols. """ -isterminal(grammar::Grammar, rule_index::Int)::Bool = grammar.isterminal[rule_index] +isterminal(grammar::AbstractGrammar, rule_index::Int)::Bool = grammar.isterminal[rule_index] """ - iseval(grammar::Grammar)::Bool + iseval(grammar::AbstractGrammar)::Bool Returns true if any production rules in grammar contain the special _() eval function. @@ -109,11 +109,11 @@ Returns true if any production rules in grammar contain the special _() eval fun evaluate immediately functionality is not yet supported by most of Herb.jl """ -iseval(grammar::Grammar)::Bool = any(grammar.iseval) +iseval(grammar::AbstractGrammar)::Bool = any(grammar.iseval) """ - iseval(grammar::Grammar, index::Int)::Bool + iseval(grammar::AbstractGrammar, index::Int)::Bool Returns true if the production rule at rule_index contains the special _() eval function. @@ -121,18 +121,18 @@ Returns true if the production rule at rule_index contains the special _() eval evaluate immediately functionality is not yet supported by most of Herb.jl """ -iseval(grammar::Grammar, index::Int)::Bool = grammar.iseval[index] +iseval(grammar::AbstractGrammar, index::Int)::Bool = grammar.iseval[index] """ - log_probability(grammar::Grammar, index::Int)::Real + log_probability(grammar::AbstractGrammar, index::Int)::Real Returns the log probability for the rule at `index` in the grammar. !!! warning If the grammar is not probabilistic, a warning is displayed, and a uniform probability is assumed. """ -function log_probability(grammar::Grammar, index::Int)::Real +function log_probability(grammar::AbstractGrammar, index::Int)::Real if !isprobabilistic(grammar) @warn "Requesting probability in a non-probabilistic grammar.\nUniform distribution is assumed." # Assume uniform probability @@ -142,7 +142,7 @@ function log_probability(grammar::Grammar, index::Int)::Real end """ - probability(grammar::Grammar, index::Int)::Real + probability(grammar::AbstractGrammar, index::Int)::Real Return the probability for a rule in the grammar. Use [`log_probability`](@ref) whenever possible. @@ -150,7 +150,7 @@ Use [`log_probability`](@ref) whenever possible. !!! warning If the grammar is not probabilistic, a warning is displayed, and a uniform probability is assumed. """ -function probability(grammar::Grammar, index::Int)::Real +function probability(grammar::AbstractGrammar, index::Int)::Real if !isprobabilistic(grammar) @warn "Requesting probability in a non-probabilistic grammar.\nUniform distribution is assumed." # Assume uniform probability @@ -160,30 +160,30 @@ function probability(grammar::Grammar, index::Int)::Real end """ - isprobabilistic(grammar::Grammar)::Bool + isprobabilistic(grammar::AbstractGrammar)::Bool -Function returns whether a [`Grammar`](@ref) is probabilistic. +Function returns whether a [`AbstractGrammar`](@ref) is probabilistic. """ -isprobabilistic(grammar::Grammar)::Bool = !(grammar.log_probabilities ≡ nothing) +isprobabilistic(grammar::AbstractGrammar)::Bool = !(grammar.log_probabilities ≡ nothing) """ - nchildren(grammar::Grammar, rule_index::Int)::Int + nchildren(grammar::AbstractGrammar, rule_index::Int)::Int Returns the number of children (nonterminals) of the production rule at `rule_index`. """ -nchildren(grammar::Grammar, rule_index::Int)::Int = length(grammar.childtypes[rule_index]) +nchildren(grammar::AbstractGrammar, rule_index::Int)::Int = length(grammar.childtypes[rule_index]) """ - max_arity(grammar::Grammar)::Int + max_arity(grammar::AbstractGrammar)::Int -Returns the maximum arity (number of children) over all production rules in the [`Grammar`](@ref). +Returns the maximum arity (number of children) over all production rules in the [`AbstractGrammar`](@ref). """ -max_arity(grammar::Grammar)::Int = maximum(length(cs) for cs in grammar.childtypes) +max_arity(grammar::AbstractGrammar)::Int = maximum(length(cs) for cs in grammar.childtypes) -function Base.show(io::IO, grammar::Grammar) +function Base.show(io::IO, grammar::AbstractGrammar) for i in eachindex(grammar.rules) println(io, i, ": ", grammar.types[i], " = ", grammar.rules[i]) end @@ -191,7 +191,7 @@ end """ - add_rule!(g::Grammar, e::Expr) + add_rule!(g::AbstractGrammar, e::Expr) Adds a rule to the grammar. @@ -204,7 +204,7 @@ The syntax is identical to the syntax of [`@csgrammar`](@ref) and [`@cfgrammar`] !!! warning Calls to this function are ignored if a rule is already in the grammar. """ -function add_rule!(g::Grammar, e::Expr) +function add_rule!(g::AbstractGrammar, e::Expr) if e.head == :(=) && typeof(e.args[1]) == Symbol s = e.args[1] # Name of return type rule = e.args[2] # expression? @@ -237,7 +237,7 @@ end """ Adds a probabilistic derivation rule. """ -function add_rule!(g::Grammar, p::Real, e::Expr) +function add_rule!(g::AbstractGrammar, p::Real, e::Expr) isprobabilistic(g) || throw(ArgumentError("adding a probabilistic rule to a non-probabilistic grammar")) len₀ = length(g.rules) add_rule!(g, e) @@ -248,13 +248,13 @@ function add_rule!(g::Grammar, p::Real, e::Expr) end """ - remove_rule!(g::Grammar, idx::Int) + remove_rule!(g::AbstractGrammar, idx::Int) Removes the rule corresponding to `idx` from the grammar. In order to avoid shifting indices, the rule is replaced with `nothing`, and all other data structures are updated accordingly. """ -function remove_rule!(g::Grammar, idx::Int) +function remove_rule!(g::AbstractGrammar, idx::Int) type = g.types[idx] g.rules[idx] = nothing g.iseval[idx] = false @@ -275,7 +275,7 @@ end """ - cleanup_removed_rules!(g::Grammar) + cleanup_removed_rules!(g::AbstractGrammar) Removes any placeholders for previously deleted rules. This means that indices get shifted. @@ -285,7 +285,7 @@ This means that indices get shifted. [`AbstractRuleNode`](@ref) trees created before the call to this function. These trees become meaningless. """ -function cleanup_removed_rules!(g::Grammar) +function cleanup_removed_rules!(g::AbstractGrammar) rules_to_cleanup = findall(isequal(nothing), g.rules) # highest indices are removed first, otherwise their index will have shifted for v ∈ [g.rules, g.types, g.isterminal, g.iseval, g.childtypes] diff --git a/src/rulenode_operators.jl b/src/rulenode_operators.jl index 56fb74c..1401df8 100644 --- a/src/rulenode_operators.jl +++ b/src/rulenode_operators.jl @@ -1,15 +1,15 @@ -HerbCore.RuleNode(ind::Int, grammar::Grammar) = RuleNode(ind, nothing, [Hole(get_domain(grammar, type)) for type ∈ grammar.childtypes[ind]]) -HerbCore.RuleNode(ind::Int, _val::Any, grammar::Grammar) = RuleNode(ind, _val, [Hole(get_domain(grammar, type)) for type ∈ grammar.childtypes[ind]]) +HerbCore.RuleNode(ind::Int, grammar::AbstractGrammar) = RuleNode(ind, nothing, [Hole(get_domain(grammar, type)) for type ∈ grammar.childtypes[ind]]) +HerbCore.RuleNode(ind::Int, _val::Any, grammar::AbstractGrammar) = RuleNode(ind, _val, [Hole(get_domain(grammar, type)) for type ∈ grammar.childtypes[ind]]) rulesoftype(::Hole, ::Set{Int}) = Set{Int}() """ - rulesoftype(node::RuleNode, grammar::Grammar, ruletype::Symbol) + rulesoftype(node::RuleNode, grammar::AbstractGrammar, ruletype::Symbol) Returns every rule of nonterminal symbol `ruletype` that is also used in the [`AbstractRuleNode`](@ref) tree. """ -rulesoftype(node::RuleNode, grammar::Grammar, ruletype::Symbol) = rulesoftype(node, Set{Int}(grammar[ruletype])) -rulesoftype(::Hole, ::Grammar, ::Symbol) = Set{Int}() +rulesoftype(node::RuleNode, grammar::AbstractGrammar, ruletype::Symbol) = rulesoftype(node, Set{Int}(grammar[ruletype])) +rulesoftype(::Hole, ::AbstractGrammar, ::Symbol) = Set{Int}() """ @@ -46,15 +46,15 @@ rulesoftype(::Hole, ::Set{Int}, ::RuleNode) = Set() rulesoftype(::Hole, ::Set{Int}, ::Hole) = Set() """ - rulesoftype(node::RuleNode, grammar::Grammar, ruletype::Symbol, ignoreNode::RuleNode) + rulesoftype(node::RuleNode, grammar::AbstractGrammar, ruletype::Symbol, ignoreNode::RuleNode) Returns every rule of nonterminal symbol `ruletype` that is also used in the [`AbstractRuleNode`](@ref) tree, but not in the `ignoreNode` subtree. !!! warning The `ignoreNode` must be a subtree of `node` for it to have an effect. """ -rulesoftype(node::RuleNode, grammar::Grammar, ruletype::Symbol, ignoreNode::RuleNode) = rulesoftype(node, Set(grammar[ruletype]), ignoreNode) -rulesoftype(::Hole, ::Grammar, ::Symbol, ::RuleNode) = Set() +rulesoftype(node::RuleNode, grammar::AbstractGrammar, ruletype::Symbol, ignoreNode::RuleNode) = rulesoftype(node, Set(grammar[ruletype]), ignoreNode) +rulesoftype(::Hole, ::AbstractGrammar, ::Symbol, ::RuleNode) = Set() """ swap_node(expr::AbstractRuleNode, new_expr::AbstractRuleNode, path::Vector{Int}) @@ -172,12 +172,12 @@ end """ - rulenode2expr(rulenode::RuleNode, grammar::Grammar) + rulenode2expr(rulenode::RuleNode, grammar::AbstractGrammar) Converts a [`RuleNode`](@ref) into a Julia expression corresponding to the rule definitions in the grammar. The returned expression can be evaluated with Julia semantics using `eval()`. """ -function rulenode2expr(rulenode::RuleNode, grammar::Grammar) +function rulenode2expr(rulenode::RuleNode, grammar::AbstractGrammar) root = (rulenode._val !== nothing) ? rulenode._val : deepcopy(grammar.rules[rulenode.ind]) if !grammar.isterminal[rulenode.ind] # not terminal @@ -187,14 +187,14 @@ function rulenode2expr(rulenode::RuleNode, grammar::Grammar) end -function _rulenode2expr(rulenode::Hole, grammar::Grammar) +function _rulenode2expr(rulenode::Hole, grammar::AbstractGrammar) # Find the index of the first element that is true index = findfirst(==(true), rulenode.domain) return isnothing(index) ? :Nothing : grammar.types[index] end -rulenode2expr(rulenode::Hole, grammar::Grammar) = _rulenode2expr(rulenode::Hole, grammar::Grammar) +rulenode2expr(rulenode::Hole, grammar::AbstractGrammar) = _rulenode2expr(rulenode::Hole, grammar::AbstractGrammar) -function _rulenode2expr(expr::Expr, rulenode::RuleNode, grammar::Grammar, j=0) +function _rulenode2expr(expr::Expr, rulenode::RuleNode, grammar::AbstractGrammar, j=0) for (k,arg) in enumerate(expr.args) if isa(arg, Expr) expr.args[k],j = _rulenode2expr(arg, rulenode, grammar, j) @@ -215,7 +215,7 @@ function _rulenode2expr(expr::Expr, rulenode::RuleNode, grammar::Grammar, j=0) end -function _rulenode2expr(typ::Symbol, rulenode::RuleNode, grammar::Grammar, j=0) +function _rulenode2expr(typ::Symbol, rulenode::RuleNode, grammar::AbstractGrammar, j=0) retval = typ if haskey(grammar.bytype, typ) child = rulenode.children[1] @@ -235,20 +235,20 @@ end """ Calculates the log probability associated with a rulenode in a probabilistic grammar. """ -function rulenode_log_probability(node::RuleNode, grammar::Grammar) +function rulenode_log_probability(node::RuleNode, grammar::AbstractGrammar) log_probability(grammar, node.ind) + sum((rulenode_log_probability(c, grammar) for c ∈ node.children), init=1) end -rulenode_log_probability(::Hole, ::Grammar) = 1 +rulenode_log_probability(::Hole, ::AbstractGrammar) = 1 """ - iscomplete(grammar::Grammar, node::RuleNode) + iscomplete(grammar::AbstractGrammar, node::RuleNode) Returns true if the expression represented by the [`RuleNode`](@ref) is a complete expression, meaning that it is fully defined and doesn't have any [`Hole`](@ref)s. """ -function iscomplete(grammar::Grammar, node::RuleNode) +function iscomplete(grammar::AbstractGrammar, node::RuleNode) if isterminal(grammar, node) return true elseif isempty(node.children) @@ -259,93 +259,93 @@ function iscomplete(grammar::Grammar, node::RuleNode) end end -iscomplete(grammar::Grammar, ::Hole) = false +iscomplete(grammar::AbstractGrammar, ::Hole) = false """ - return_type(grammar::Grammar, node::RuleNode) + return_type(grammar::AbstractGrammar, node::RuleNode) Gives the return type or nonterminal symbol in the production rule used by `node`. """ -return_type(grammar::Grammar, node::RuleNode)::Symbol = grammar.types[node.ind] +return_type(grammar::AbstractGrammar, node::RuleNode)::Symbol = grammar.types[node.ind] """ - child_types(grammar::Grammar, node::RuleNode) + child_types(grammar::AbstractGrammar, node::RuleNode) Returns the list of child types (nonterminal symbols) in the production rule used by `node`. """ -child_types(grammar::Grammar, node::RuleNode)::Vector{Symbol} = grammar.childtypes[node.ind] +child_types(grammar::AbstractGrammar, node::RuleNode)::Vector{Symbol} = grammar.childtypes[node.ind] """ - isterminal(grammar::Grammar, node::RuleNode)::Bool + isterminal(grammar::AbstractGrammar, node::RuleNode)::Bool Returns true if the production rule used by `node` is terminal, i.e., does not contain any nonterminal symbols. """ -isterminal(grammar::Grammar, node::RuleNode)::Bool = grammar.isterminal[node.ind] +isterminal(grammar::AbstractGrammar, node::RuleNode)::Bool = grammar.isterminal[node.ind] """ - nchildren(grammar::Grammar, node::RuleNode)::Int + nchildren(grammar::AbstractGrammar, node::RuleNode)::Int Returns the number of children in the production rule used by `node`. """ -nchildren(grammar::Grammar, node::RuleNode)::Int = length(child_types(grammar, node)) +nchildren(grammar::AbstractGrammar, node::RuleNode)::Int = length(child_types(grammar, node)) """ - isvariable(grammar::Grammar, node::RuleNode)::Bool + isvariable(grammar::AbstractGrammar, node::RuleNode)::Bool Return true if the rule used by `node` represents a variable in a program (essentially, an input to the program) """ -isvariable(grammar::Grammar, node::RuleNode)::Bool = ( +isvariable(grammar::AbstractGrammar, node::RuleNode)::Bool = ( grammar.isterminal[node.ind] && grammar.rules[node.ind] isa Symbol && !_is_defined_in_modules(grammar.rules[node.ind], [Main, Base]) ) """ - isvariable(grammar::Grammar, node::RuleNode, mod::Module)::Bool + isvariable(grammar::AbstractGrammar, node::RuleNode, mod::Module)::Bool Return true if the rule used by `node` represents a variable. Taking into account the symbols defined in the given module(s). """ -isvariable(grammar::Grammar, node::RuleNode, mod::Module...)::Bool = ( +isvariable(grammar::AbstractGrammar, node::RuleNode, mod::Module...)::Bool = ( grammar.isterminal[node.ind] && grammar.rules[node.ind] isa Symbol && !_is_defined_in_modules(grammar.rules[node.ind], [mod..., Main, Base]) ) """ - isvariable(grammar::Grammar, ind::Int)::Bool + isvariable(grammar::AbstractGrammar, ind::Int)::Bool Return true if the rule with index `ind` represents a variable. """ -isvariable(grammar::Grammar, ind::Int)::Bool = ( +isvariable(grammar::AbstractGrammar, ind::Int)::Bool = ( grammar.isterminal[ind] && grammar.rules[ind] isa Symbol && !_is_defined_in_modules(grammar.rules[ind], [Main, Base]) ) """ - isvariable(grammar::Grammar, ind::Int, mod::Module)::Bool + isvariable(grammar::AbstractGrammar, ind::Int, mod::Module)::Bool Return true if the rule with index `ind` represents a variable. Taking into account the symbols defined in the given module(s). """ -isvariable(grammar::Grammar, ind::Int, mod::Module...)::Bool = ( +isvariable(grammar::AbstractGrammar, ind::Int, mod::Module...)::Bool = ( grammar.isterminal[ind] && grammar.rules[ind] isa Symbol && !_is_defined_in_modules(grammar.rules[ind], [mod..., Main, Base]) ) """ - contains_returntype(node::RuleNode, grammar::Grammar, sym::Symbol, maxdepth::Int=typemax(Int)) + contains_returntype(node::RuleNode, grammar::AbstractGrammar, sym::Symbol, maxdepth::Int=typemax(Int)) Returns true if the tree rooted at `node` contains at least one node at depth less than `maxdepth` with the given return type or nonterminal symbol. """ -function contains_returntype(node::RuleNode, grammar::Grammar, sym::Symbol, maxdepth::Int=typemax(Int)) +function contains_returntype(node::RuleNode, grammar::AbstractGrammar, sym::Symbol, maxdepth::Int=typemax(Int)) maxdepth < 1 && return false if return_type(grammar, node) == sym return true @@ -358,7 +358,7 @@ function contains_returntype(node::RuleNode, grammar::Grammar, sym::Symbol, maxd return false end -function Base.display(rulenode::RuleNode, grammar::Grammar) +function Base.display(rulenode::RuleNode, grammar::AbstractGrammar) root = rulenode2expr(rulenode, grammar) if isa(root, Expr) walk_tree(root) diff --git a/src/utils.jl b/src/utils.jl index e1f7ca6..b35d062 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -4,13 +4,13 @@ AbstractTrees.printnode(io::IO, node::RuleNode) = print(io, node.ind) """ - mindepth_map(grammar::Grammar) + mindepth_map(grammar::AbstractGrammar) -Returns the minimum depth achievable for each production rule in the [`Grammar`](@ref). +Returns the minimum depth achievable for each production rule in the [`AbstractGrammar`](@ref). In other words, this function finds the depths of the lowest trees that can be made using each of the available production rules as a root. """ -function mindepth_map(grammar::Grammar) +function mindepth_map(grammar::AbstractGrammar) dmap0 = Int[isterminal(grammar,i) ? 1 : typemax(Int)/2 for i in eachindex(grammar.rules)] dmap1 = fill(-1, length(grammar.rules)) while dmap0 != dmap1 @@ -23,37 +23,37 @@ function mindepth_map(grammar::Grammar) end -function _mindepth(grammar::Grammar, rule_index::Int, dmap::AbstractVector{Int}) +function _mindepth(grammar::AbstractGrammar, rule_index::Int, dmap::AbstractVector{Int}) isterminal(grammar, rule_index) && return 1 return 1 + maximum([mindepth(grammar, ctyp, dmap) for ctyp in child_types(grammar, rule_index)]) end """ - mindepth(grammar::Grammar, typ::Symbol, dmap::AbstractVector{Int}) + mindepth(grammar::AbstractGrammar, typ::Symbol, dmap::AbstractVector{Int}) Returns the minimum depth achievable for a given nonterminal symbol. The minimum depth is the depth of the lowest tree that can be made using `typ` as a start symbol. `dmap` can be obtained from [`mindepth_map`](@ref). """ -function mindepth(grammar::Grammar, typ::Symbol, dmap::AbstractVector{Int}) +function mindepth(grammar::AbstractGrammar, typ::Symbol, dmap::AbstractVector{Int}) return minimum(dmap[grammar.bytype[typ]]) end """ SymbolTable -Data structure for mapping terminal symbols in the [`Grammar`](@ref) to their Julia interpretation. +Data structure for mapping terminal symbols in the [`AbstractGrammar`](@ref) to their Julia interpretation. """ const SymbolTable = Dict{Symbol,Any} """ - SymbolTable(grammar::Grammar, mod::Module=Main) + SymbolTable(grammar::AbstractGrammar, mod::Module=Main) Returns a [`SymbolTable`](@ref) populated with a mapping from symbols in the -[`Grammar`](@ref) to symbols in module `mod` or `Main`, if defined. +[`AbstractGrammar`](@ref) to symbols in module `mod` or `Main`, if defined. """ -function HerbGrammar.SymbolTable(grammar::Grammar, mod::Module=Main) +function HerbGrammar.SymbolTable(grammar::AbstractGrammar, mod::Module=Main) tab = SymbolTable() for rule in grammar.rules _add_to_symboltable!(tab, rule, mod)