Skip to content

Commit

Permalink
Merge pull request #114 from Kolaru/new_parser
Browse files Browse the repository at this point in the history
New parser - simplified with a tokenizer pass
  • Loading branch information
Kolaru authored Dec 30, 2023
2 parents cad0b7f + 06b8643 commit c8cb679
Show file tree
Hide file tree
Showing 11 changed files with 272 additions and 363 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "MathTeXEngine"
uuid = "0a4f8689-d25c-4efe-a92b-7142dfc1aa53"
authors = ["Benoît Richard <[email protected]>"]
version = "0.5.7"
version = "0.6.0"

[deps]
AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ The table below contains the list of all supported LaTeX construction and their
| Function | `\sin` | `:function` | `name` |
| Generic symbol | `ω` | `:symbol` | `unicode_char` |
| Group | `{ }` | `:group` | `elements...` |
| Inline math | `$ $` | `:inline_math` | `content` |
| Integral | `\int_a^b` | `:integral` | `symbol, low_bound, high_bound` |
| Math fonts | `\mathrm{}` | `:font` | `font_modifier, expr` |
| Punctuation | `!` | `:punctuation` |
Expand Down
3 changes: 2 additions & 1 deletion src/MathTeXEngine.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,15 @@ import FreeTypeAbstraction:
height_insensitive_boundingbox, leftinkbound, rightinkbound,
topinkbound, bottominkbound

export TeXExpr, texparse
export TeXExpr, texparse, TeXParseError, manual_texexpr
export TeXElement, TeXChar, VLine, HLine, generate_tex_elements
export texfont
export glyph_index

# Reexport from LaTeXStrings
export @L_str

include("parser/tokenizer.jl")
include("parser/texexpr.jl")
include("parser/commands_data.jl")
include("parser/commands_registration.jl")
Expand Down
20 changes: 17 additions & 3 deletions src/engine/layout.jl
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ function tex_layout(expr, state)
try
if isleaf(expr)
char = args[1]
if char == ' ' && state.tex_mode == :inline_math
return Space(0.0)
end
texchar = TeXChar(char, state, head)
return texchar
elseif head == :combining_accent
Expand Down Expand Up @@ -92,7 +95,7 @@ function tex_layout(expr, state)
],
scales
)
elseif head == :font || head == :text
elseif head == :font
modifier, content = args
return tex_layout(content, add_font_modifier(state, modifier))
elseif head == :frac
Expand Down Expand Up @@ -124,8 +127,12 @@ function tex_layout(expr, state)
name = args[1]
elements = TeXChar.(collect(name), state, Ref(:function))
return horizontal_layout(elements)
elseif head == :group || head == :expr
elements = tex_layout.(args, state)
elseif head == :group || head == :expr || head == :inline_math
mode = head == :inline_math ? :inline_math : state.tex_mode
elements = tex_layout.(args, change_mode(state, mode))
if isempty(elements)
return Space(0.0)
end
return horizontal_layout(elements)
elseif head == :integral
pad = 0.1
Expand Down Expand Up @@ -196,6 +203,11 @@ function tex_layout(expr, state)
(rightinkbound(content), 0)
]
)
elseif head == :text
modifier, content = args
new_state = add_font_modifier(state, modifier)
new_state = change_mode(new_state, :text)
return tex_layout(content, new_state)
elseif head == :underover
core, sub, super = tex_layout.(args, state)

Expand Down Expand Up @@ -290,6 +302,7 @@ function generate_tex_elements(str, font_family=FontFamily())
return unravel(layout)
end

#=
# Still hacky as hell
function generate_tex_elements(str::LaTeXString, font_family=FontFamily())
parts = String.(split(str, raw"$"))
Expand All @@ -302,3 +315,4 @@ function generate_tex_elements(str::LaTeXString, font_family=FontFamily())
return unravel(horizontal_layout(groups))
end
=#
12 changes: 9 additions & 3 deletions src/engine/layout_context.jl
Original file line number Diff line number Diff line change
@@ -1,16 +1,22 @@
struct LayoutState
font_family::FontFamily
font_modifiers::Vector{Symbol}
tex_mode::Symbol
end

LayoutState(font_family::FontFamily, modifiers::Vector) = LayoutState(font_family, modifiers, :text)
LayoutState(font_family::FontFamily) = LayoutState(font_family, Symbol[])
LayoutState() = LayoutState(FontFamily())

Base.broadcastable(state::LayoutState) = [state]
Base.broadcastable(state::LayoutState) = Ref(state)

function change_mode(state::LayoutState, mode)
LayoutState(state.font_family, state.font_modifiers, mode)
end

function add_font_modifier(state::LayoutState, modifier)
modifiers = [state.font_modifiers..., modifier]
return LayoutState(state.font_family, modifiers)
modifiers = vcat(state.font_modifiers, modifier)
return LayoutState(state.font_family, modifiers, state.tex_mode)
end

function get_font(state::LayoutState, char_type)
Expand Down
144 changes: 77 additions & 67 deletions src/parser/commands_registration.jl
Original file line number Diff line number Diff line change
Expand Up @@ -27,100 +27,119 @@ function Base.get(d::CanonicalDict, key, default)
return default
end


# Each symbol or command has a unique canonical representation
# Each symbol or com_str has a unique canonical representation
const symbol_to_canonical = CanonicalDict{Char}()
const command_to_canonical = CanonicalDict{String}()

function canonical_expr(char::Char)
haskey(symbol_to_canonical, char) && return symbol_to_canonical[char]
return TeXExpr(:char, char)
end

canonical_expr(command::String) = get(command_to_canonical, command, nothing)

# Symbols missing from the REPL completion data
latex_symbols[raw"\neq"] = ""

function get_symbol_char(command)
if !haskey(latex_symbols, command)
@warn "unknown command $command"
function get_symbol_char(com_str)
if !haskey(latex_symbols, com_str)
@warn "unknown com_str $com_str"
return '?'
end

return first(latex_symbols[command])
end

# Numbers
for char in join(0:9)
symbol_to_canonical[char] = TeXExpr(:digit, char)
return first(latex_symbols[com_str])
end

##
## Special commands
## Commands
##

command_to_canonical[raw"\frac"] = TeXExpr(:argument_gatherer, [:frac, 2])
command_to_canonical[raw"\sqrt"] = TeXExpr(:argument_gatherer, [:sqrt, 1])
command_to_canonical[raw"\overline"] = TeXExpr(:argument_gatherer, [:overline, 1])
command_to_canonical[raw"\{"] = TeXExpr(:delimiter, '{')
command_to_canonical[raw"\}"] = TeXExpr(:delimiter, '}')
function command_expr(com_str, args)
template = copy(command_definitions[com_str][1])
return TeXExpr(template.head, vcat(template.args, args))
end
required_args(com_str) = command_definitions[com_str][2]

const command_definitions = Dict(
raw"\frac" => (TeXExpr(:frac), 2),
raw"\sqrt" => (TeXExpr(:sqrt), 1),
raw"\overline" => (TeXExpr(:overline), 1),
raw"\{" => (TeXExpr(:delimiter, '{'), 0),
raw"\}" => (TeXExpr(:delimiter, '}'), 0),
)

for func in underover_functions
com_str = "\\" * func
template = TeXExpr(:underover, Any[TeXExpr(:function, func), nothing, nothing])
command_definitions[com_str] = (template, 0)
end

for func in generic_functions
com_str = "\\" * func
command_definitions[com_str] = (TeXExpr(:function, func), 0)
end

for (com_str, width) in space_commands
command_definitions[com_str] = (TeXExpr(:space, width), 0)
end

for com_str in combining_accents
combining_char = get_symbol_char(com_str)
template = TeXExpr(:combining_accent, TeXExpr(:symbol, combining_char))
command_definitions[com_str] = (template, 1)
end

for name in font_names
com_str = "\\math$name"
command_definitions[com_str] = (TeXExpr(:font, Symbol(name)), 1)
com_str = "\\text$name"
command_definitions[com_str] = (TeXExpr(:text, Symbol(name)), 1)
end
command_definitions["\\text"] = (TeXExpr(:text, :rm), 1)

##
## Commands from the commands_data.jl file
## Symbols
##

# Symbols missing from the REPL completion data
latex_symbols[raw"\neq"] = ""

# Numbers
for char in join(0:9)
symbol_to_canonical[char] = TeXExpr(:digit, char)
end

for symbol in spaced_symbols
symbol_expr = TeXExpr(:symbol, symbol)
symbol_to_canonical[symbol] = TeXExpr(:spaced, symbol_expr)
end

# Special case for hyphen that must be replaced by a minus sign
# TODO Make sure it is not replaced outside of math mode
# TODO Make sure it is not replaced outside of math mode and when starting a group
symbol_to_canonical['-'] = TeXExpr(:spaced, TeXExpr(:symbol, ''))

for command in spaced_commands
symbol = get_symbol_char(command)
for com_str in spaced_commands
symbol = get_symbol_char(com_str)
symbol_expr = TeXExpr(:symbol, symbol)
symbol_to_canonical[symbol] = command_to_canonical[command] = TeXExpr(:spaced, symbol_expr)
template = TeXExpr(:spaced, symbol_expr)
symbol_to_canonical[symbol] = template
command_definitions[com_str] = (template, 0)
end

for command in underover_commands
symbol = get_symbol_char(command)
for com_str in underover_commands
symbol = get_symbol_char(com_str)
symbol_expr = TeXExpr(:symbol, symbol)
symbol_to_canonical[symbol] = command_to_canonical[command] = TeXExpr(:underover, Any[symbol_expr, nothing, nothing])
end

for func in underover_functions
command = "\\" * func
command_to_canonical[command] = TeXExpr(:underover, Any[TeXExpr(:function, func), nothing, nothing])
template = TeXExpr(:underover, Any[symbol_expr, nothing, nothing])
symbol_to_canonical[symbol] = template
command_definitions[com_str] = (template, 0)
end

for command in integral_commands
symbol = get_symbol_char(command)
for com_str in integral_commands
symbol = get_symbol_char(com_str)
symbol_expr = TeXExpr(:symbol, symbol)
symbol_to_canonical[symbol] = command_to_canonical[command] = TeXExpr(:integral, Any[symbol_expr, nothing, nothing])
end

for func in generic_functions
command = "\\" * func
command_to_canonical[command] = TeXExpr(:function, func)
end

for (command, width) in space_commands
command_to_canonical[command] = TeXExpr(:space, width)
template = TeXExpr(:integral, Any[symbol_expr, nothing, nothing])
symbol_to_canonical[symbol] = template
command_definitions[com_str] = (template, 0)
end

for (symbol, width) in space_symbols
symbol_to_canonical[symbol] = TeXExpr(:space, width)
end

for command in combining_accents
combining_char = get_symbol_char(command)
symbol_expr = TeXExpr(:symbol, combining_char)
command_to_canonical[command] = TeXExpr(:argument_gatherer, [:combining_accent, 2, symbol_expr])
end

for symbol in punctuation_symbols
symbol = first(symbol)
symbol_to_canonical[symbol] = TeXExpr(:punctuation, symbol)
Expand All @@ -131,30 +150,21 @@ for symbol in delimiter_symbols
symbol_to_canonical[symbol] = TeXExpr(:delimiter, symbol)
end

for name in font_names
command = "\\math$name"
command_to_canonical[command] = TeXExpr(:argument_gatherer, [:font, 2, Symbol(name)])
command = "\\text$name"
command_to_canonical[command] = TeXExpr(:argument_gatherer, [:text, 2, Symbol(name)])
end
command = "\\text"
command_to_canonical[command] = TeXExpr(:argument_gatherer, [:text, 2, :rm])

##
## Default behavior
##
# We put it at the end to avoid overwritting existing commands

for (command, symbol) in latex_symbols
for (com_str, symbol) in latex_symbols
symbol = first(symbol) # Convert String to Char
symbol_expr = TeXExpr(:symbol, [symbol])
symbol_expr = TeXExpr(:symbol, symbol)

if !haskey(symbol_to_canonical, symbol)
symbol_to_canonical[symbol] = symbol_expr
end

# Separate case for symbols that have multiple valid commands
if !haskey(command_to_canonical, command)
command_to_canonical[command] = symbol_expr
if !haskey(command_definitions, com_str)
command_definitions[com_str] = (symbol_expr, 0)
end
end
Loading

0 comments on commit c8cb679

Please sign in to comment.