-
Notifications
You must be signed in to change notification settings - Fork 1
/
lexer.rb
118 lines (95 loc) · 2.61 KB
/
lexer.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
class Lexer
attr_accessor :position, :tokens, :current_indent, :indent_stack
KEYWORDS = ["class", "def", "if", "while", "true", "false", "nil", "or", "and"]
def initialize
@position = 0
@tokens = []
@current_indent = 0
@indent_stack = []
end
def tokenize code
code.chomp!
while @position < code.size
chunk = code[@position..-1]
if identifier = chunk[/\A([a-z]\w*)/, 1]
tokenize_identifier identifier
elsif constant = chunk[/\A([A-Z]\w*)/, 1]
tokenize_constant constant
elsif number = chunk[/\A([0-9]+)/, 1]
tokenize_number number
elsif string = chunk[/\A"(.*?)"/, 1]
tokenize_string string
elsif array = chunk[/\A\[(.*?)\]/, 1]
tokenize_array array
elsif indent = chunk[/\A\n( *)/m, 1]
tokenize_indent indent
elsif operator = chunk[/\A(\|\||&&|==|!=|<=|>=|=>)/, 1]
tokenize_operator operator
elsif chunk.match(/\A /)
remove_whitespace
else
tokenize_symbol chunk[0,1]
end
end
finalize_dedent
@tokens
end
private
def tokenize_identifier identifier
if KEYWORDS.include?(identifier)
@tokens << [identifier.upcase.to_sym, identifier]
else
@tokens << [:IDENTIFIER, identifier]
end
@position += identifier.size
end
def tokenize_constant constant
@tokens << [:CONSTANT, constant]
@position += constant.size
end
def tokenize_number number
@tokens << [:NUMBER, number.to_i]
@position += number.size
end
def tokenize_string string
@tokens << [:STRING, string]
@position += string.size + 2
end
def tokenize_array array
@tokens << [:ARRAY, array]
@position += array.size + 2
end
def tokenize_indent indent
if indent.size > @current_indent
@current_indent = indent.size
@indent_stack.push(@current_indent)
@tokens << [:INDENT, indent.size]
elsif indent.size == @current_indent
@tokens << [:NEWLINE, "\n"]
elsif indent.size < @current_indent
while indent.size < @current_indent
@indent_stack.pop
@current_indent = @indent_stack.last || 0
@tokens << [:DEDENT, indent.size]
end
@tokens << [:NEWLINE, "\n"]
end
@position += indent.size + 1
end
def tokenize_operator operator
@tokens << [operator, operator]
@position += operator.size
end
def tokenize_symbol symbol
@tokens << [symbol, symbol]
@position += 1
end
def remove_whitespace
@position += 1
end
def finalize_dedent
while indent = @indent_stack.pop
@tokens << [:DEDENT, @indent_stack.first || 0]
end
end
end