Skip to content

Commit

Permalink
Add support for Amazon Ion (#1017)
Browse files Browse the repository at this point in the history
  • Loading branch information
kevin-sellers committed May 2, 2023
1 parent 1e2c367 commit 35e6f20
Show file tree
Hide file tree
Showing 4 changed files with 417 additions and 0 deletions.
20 changes: 20 additions & 0 deletions lib/rouge/demos/ion
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
/**
* Sample Ion document.
*/
{
key: "value",
'float': 1_234e-5,
"annotation": type:: null.string,
list: 'things':: ['symbol', nan, +inf, -inf, "other"],
sexp: (concat ('symbol' [list] "string" null)),
date: date:: 1970-01-01T00:00Z,
"long string": str:: (
'''
long strings are neat
'''
),
struct: {
nested: { value: int:: 12_345 },
},
blob: {{ SGVsbG8sIFdvcmxkIQ== }}, // Hello, World!
}
175 changes: 175 additions & 0 deletions lib/rouge/lexers/ion.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
# -*- coding: utf-8 -*- #
# frozen_string_literal: true

module Rouge
module Lexers
class Ion < RegexLexer
title 'Ion'
desc 'Amazon Ion (https://amazon-ion.github.io/ion-docs)'
tag 'ion'
filenames '*.ion'
mimetypes 'application/ion'

escapes = %r{\\(?:[\\abtR"'/?\\]|x\h{2}|u\h{4}|U\h{8})}
q = %r{'(?:#{escapes}|\\'|[^"\n\r])+'}
qq = %r{"(?:#{escapes}|\\"|[^"\n\r])+"}
qqq = %r{'''}
symbol = %r{[A-Za-z_\$](?:[0-9A-Za-z_\$])*}

state :qq do
rule qq, Literal::String::Double
end

state :qqq do
rule %r{'''}, Literal::String::Double, :pop!
rule %r{[^']+}m, Literal::String::Double
rule %r{'}, Literal::String::Double
end

state :quotes do
rule qqq, Literal::String::Double, :qqq
mixin :qq
rule symbol, Literal::String::Symbol
rule q, Literal::String::Symbol
end

state :annotation do
annotation = %r{(?:[\u{0020}-\u{0026}]|[\u{0028}-\u{005B}]|[\u{005D}-\u{FFFF}]|[\t\b\f ])+}
rule %r{('#{annotation}'|#{symbol})(\s*)(::)} do
groups Name::Decorator, Text::Whitespace, Operator
end
end

state :comments do
rule %r{/\*.*?\*/}m, Comment::Multiline
rule %r{//.*?$}, Comment::Single
end

state :constants do
rule %r{(?:true|false)\b}, Name::Builtin
rule %r{null(?:\.(?:blob|bool|clob|decimal|float|int|list|null|sexp|string|struct|symbol|timestamp))?\b}, Name::Builtin
end

state :numbers do
rule %r{0b[01]+(?:_[01]+)*\b}, Literal::Number::Bin
rule %r{0x\h+(?:_\h+)*\b}, Literal::Number::Hex
rule %r{(?:nan|[+-]inf)\b}, Literal::Number::Float

integer = %r{-?(?:0|[1-9]\d*(?:_\d+)*)}
rule %r{#{integer}[.dD][+-]?(?:#{integer})*(?:[dDeE][+-]?#{integer})?}, Literal::Number::Float
rule %r{#{integer}[dDeE][+-]?#{integer}}, Literal::Number::Float
rule integer, Literal::Number::Integer
end

state :timestamps do
year = %r{000[1-9]|00[1-9]\d|0[1-9]\d{2}|[1-9]\d{3}}
month = %r{0[1-9]|1[0-2]}
day = %r{0[1-9]|[12]\d|3[01]}
date = %r{#{year}-#{month}-#{day}}

hour = %r{[01]\d|2[0-3]}
minute = %r{[0-5]\d}
second = %r{[0-5]\d(?:\.\d+)?}
offset = %r{Z|[+-]#{hour}:#{minute}}
time = %r{#{hour}:#{minute}(?::#{second})?#{offset}}

rule %r{#{date}(T#{time}?)?|#{year}(?:-#{month})?T}, Literal::Date
end

state :whitespace do
rule %r{\s+}, Text::Whitespace
end

state :blob do
rule %r/}}/, Punctuation::Indicator, :pop!

rule qqq, Literal::String::Double, :qqq
mixin :qq
mixin :whitespace

# no attempt to validate the Base64 blob
rule %r{(?:[A-Za-z0-9/\+=]+)}, Literal
end

state :containers do
rule %r/{{/, Punctuation::Indicator, :blob
rule %r{\[}, Punctuation::Indicator, :list
rule %r{\(}, Punctuation::Indicator, :sexp
rule %r/{/, Punctuation::Indicator, :struct
end

state :list do
rule %r{]}, Punctuation::Indicator, :pop!

mixin :containers
mixin :comments
mixin :annotation
mixin :whitespace
mixin :constants
mixin :timestamps
mixin :numbers
mixin :quotes

rule %r{[,=;]}, Punctuation
end

state :sexp do
rule %r{\)}, Punctuation::Indicator, :pop!
rule %r{(?:\+\+|--|<<|>>|\&\&|\.\.|\|\||[-+\*/=<>|&$^.#!%?@`~])}, Operator

mixin :containers
mixin :comments
mixin :annotation
mixin :whitespace
mixin :constants
mixin :timestamps
mixin :numbers
mixin :quotes
end

state :struct do
rule %r/}/, Punctuation::Indicator, :pop!

rule %r{(#{q}|#{qq}|#{symbol})(\s*)(:)} do
groups Name::Label, Text::Whitespace, Punctuation
push :value
end

mixin :containers
mixin :comments
mixin :whitespace
end

state :value do
mixin :containers
mixin :comments
mixin :whitespace
mixin :annotation
mixin :constants
mixin :timestamps
mixin :numbers
mixin :quotes

rule %r{,}, Punctuation, :pop!

rule %r/(})/ do
groups Punctuation
pop!(2)
end
end

state :root do
rule %r{\s*\A\$(?:ion_1_0|ion_symbol_table)\b}, Name::Builtin::Pseudo

mixin :comments
mixin :annotation
mixin :constants
mixin :timestamps
mixin :numbers
mixin :containers
mixin :quotes
mixin :whitespace
end
end
end
end
199 changes: 199 additions & 0 deletions spec/lexers/ion_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
# -*- coding: utf-8 -*- #
# frozen_string_literal: true

describe Rouge::Lexers::Ion do
let(:subject) { Rouge::Lexers::Ion.new }

describe 'guessing' do
include Support::Guessing

it 'guesses by filename' do
assert_guess :filename => 'file.ion'
end

it 'guesses by mimetype' do
assert_guess :mimetype => 'application/ion'
end
end

describe 'lexing' do
include Support::Lexing

it 'handles a basic file' do
ion = <<~ION
/**
* comment
*/
'annotation' :: {
a: 1,
'b': 2.0,
"c": float:: 1.23e-4,
"d": 'null':: null.symbol,
"e": 'timestamps':: [
1970-01-01T00:00Z,
1970T,
],
f: 'constants and such':: [
nancy, nan,
"+infinity", +inf, -inf,
truee, false, true,
null.null, null,
nullish,
],
"g": (add [0b101, 0xdeadbeef, -1_000] "hello"),
h: { i: '''string''', },
j: {{ aW9u }},
}
ION

assert_no_errors ion

assert_tokens_equal ion,
["Comment.Multiline", "/**\n * comment\n */"],
["Text.Whitespace", "\n"],
["Name.Decorator", "'annotation'"],
["Text.Whitespace", " "],
["Operator", "::"],
["Text.Whitespace", " "],
["Punctuation.Indicator", "{"],
["Text.Whitespace", "\n "],
["Name.Label", "a"],
["Punctuation", ":"],
["Text.Whitespace", " "],
["Literal.Number.Integer", "1"],
["Punctuation", ","],
["Text.Whitespace", "\n "],
["Name.Label", "'b'"],
["Punctuation", ":"],
["Text.Whitespace", " "],
["Literal.Number.Float", "2.0"],
["Punctuation", ","],
["Text.Whitespace", "\n "],
["Name.Label", "\"c\""],
["Punctuation", ":"],
["Text.Whitespace", " "],
["Name.Decorator", "float"],
["Operator", "::"],
["Text.Whitespace", " "],
["Literal.Number.Float", "1.23e-4"],
["Punctuation", ","],
["Text.Whitespace", "\n "],
["Name.Label", "\"d\""],
["Punctuation", ":"],
["Text.Whitespace", " "],
["Name.Decorator", "'null'"],
["Operator", "::"],
["Text.Whitespace", " "],
["Name.Builtin", "null.symbol"],
["Punctuation", ","],
["Text.Whitespace", "\n "],
["Name.Label", "\"e\""],
["Punctuation", ":"],
["Text.Whitespace", " "],
["Name.Decorator", "'timestamps'"],
["Operator", "::"],
["Text.Whitespace", " "],
["Punctuation.Indicator", "["],
["Text.Whitespace", "\n "],
["Literal.Date", "1970-01-01T00:00Z"],
["Punctuation", ","],
["Text.Whitespace", "\n "],
["Literal.Date", "1970T"],
["Punctuation", ","],
["Text.Whitespace", "\n "],
["Punctuation.Indicator", "]"],
["Punctuation", ","],
["Text.Whitespace", "\n "],
["Name.Label", "f"],
["Punctuation", ":"],
["Text.Whitespace", " "],
["Name.Decorator", "'constants and such'"],
["Operator", "::"],
["Text.Whitespace", " "],
["Punctuation.Indicator", "["],
["Text.Whitespace", "\n "],
["Literal.String.Symbol", "nancy"],
["Punctuation", ","],
["Text.Whitespace", " "],
["Literal.Number.Float", "nan"],
["Punctuation", ","],
["Text.Whitespace", "\n "],
["Literal.String.Double", "\"+infinity\""],
["Punctuation", ","],
["Text.Whitespace", " "],
["Literal.Number.Float", "+inf"],
["Punctuation", ","],
["Text.Whitespace", " "],
["Literal.Number.Float", "-inf"],
["Punctuation", ","],
["Text.Whitespace", "\n "],
["Literal.String.Symbol", "truee"],
["Punctuation", ","],
["Text.Whitespace", " "],
["Name.Builtin", "false"],
["Punctuation", ","],
["Text.Whitespace", " "],
["Name.Builtin", "true"],
["Punctuation", ","],
["Text.Whitespace", "\n "],
["Name.Builtin", "null.null"],
["Punctuation", ","],
["Text.Whitespace", " "],
["Name.Builtin", "null"],
["Punctuation", ","],
["Text.Whitespace", "\n "],
["Literal.String.Symbol", "nullish"],
["Punctuation", ","],
["Text.Whitespace", "\n "],
["Punctuation.Indicator", "]"],
["Punctuation", ","],
["Text.Whitespace", "\n "],
["Name.Label", "\"g\""],
["Punctuation", ":"],
["Text.Whitespace", " "],
["Punctuation.Indicator", "("],
["Literal.String.Symbol", "add"],
["Text.Whitespace", " "],
["Punctuation.Indicator", "["],
["Literal.Number.Bin", "0b101"],
["Punctuation", ","],
["Text.Whitespace", " "],
["Literal.Number.Hex", "0xdeadbeef"],
["Punctuation", ","],
["Text.Whitespace", " "],
["Literal.Number.Integer", "-1_000"],
["Punctuation.Indicator", "]"],
["Text.Whitespace", " "],
["Literal.String.Double", "\"hello\""],
["Punctuation.Indicator", ")"],
["Punctuation", ","],
["Text.Whitespace", "\n "],
["Name.Label", "h"],
["Punctuation", ":"],
["Text.Whitespace", " "],
["Punctuation.Indicator", "{"],
["Text.Whitespace", " "],
["Name.Label", "i"],
["Punctuation", ":"],
["Text.Whitespace", " "],
["Literal.String.Double", "'''string'''"],
["Punctuation", ","],
["Text.Whitespace", " "],
["Punctuation.Indicator", "}"],
["Punctuation", ","],
["Text.Whitespace", "\n "],
["Name.Label", "j"],
["Punctuation", ":"],
["Text.Whitespace", " "],
["Punctuation.Indicator", "{{"],
["Text.Whitespace", " "],
["Literal", "aW9u"],
["Text.Whitespace", " "],
["Punctuation.Indicator", "}}"],
["Punctuation", ","],
["Text.Whitespace", "\n"],
["Punctuation.Indicator", "}"],
["Text.Whitespace", "\n"]
end
end
end
Loading

0 comments on commit 35e6f20

Please sign in to comment.