-
Notifications
You must be signed in to change notification settings - Fork 0
/
lexer_regex.lex
105 lines (105 loc) · 1.54 KB
/
lexer_regex.lex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
// Define the regex for all the tokens
// Each line is a key-value pair in the format key:value. The key part is the token
// the value part (inside the braces) defines the regex.
// Character classes
// [1-9] - Digits 1 - 9
// [0-9] - Digits 0 - 9
// [a-z] - Lower Case a - z
// [A-Z] - Upper Case A - Z
// [abc] - letters a, b, c
// [E-L] - End of Line, [\n]
// [E-F] - End of File, [EOF]
// [W-S] - White Space
// TODO: Nested comments
DEFINITIONS:
// Keywords
CLASS : {(class|Class)}
ELSE : {else}
FALSE : {false}
FI : {fi}
IF : {if}
IN : {in}
INHERITS : {inherits}
ISVOID : {isvoid}
LET : {let}
LOOP : {loop}
POOL : {pool}
THEN : {then}
WHILE : {while}
CASE : {case}
ESAC : {esac}
NEW : {new}
OF : {of}
NOT : {not}
TRUE : {true}
// Operators
PLUS : {+}
MINUS : {-}
TIMES : {\*}
DIVIDE : {/}
LT : {<}
LE : {<=}
EQUALS : {=}
LARROW : {<\-}
RARROW : {=>}
LBRACE : {\{}
RBRACE : {\}}
LPAREN : {\(}
RPAREN : {\)}
COLON : {:}
SEMI : {;}
DOT : {\.}
COMMA : {,}
TILDE : {~}
AT : {@}
INTEGER : {(0|[1-9]([0-9]*))}
IDENTIFIER : {[a-z]([A-Za-z0-9_]*)}
TYPE : {[A-Z]([A-Za-z_0-9]*)}
SELF_IDENTIFIER : {self}
SELF_TYPE : {SELF_TYPE}
STRING : {"((\\.|[W-S]|[^\\"])*)"}
COMMENT_LINE : {--(([^E-LE-F])*)([E-L]|[E-F])}
//// Handle the rest of the logic in the next part of lexer ????
COMMENT_BLOCK_START : {\(\*}
COMMENT_BLOCK_END : {\*\)}
WS : {[W-S]}
KEYWORDS:
CLASS
ELSE
FALSE
FI
IF
IN
INHERITS
ISVOID
LET
LOOP
POOL
THEN
WHILE
CASE
ESAC
NEW
OF
NOT
TRUE
SYMBOLS:
PLUS
MINUS
TIMES
DIVIDE
LT
LE
EQUALS
LARROW
RARROW
LBRACE
RBRACE
LPAREN
RPAREN
COLON
SEMI
DOT
COMMA
TILDE
AT