-
Notifications
You must be signed in to change notification settings - Fork 0
/
lex.ml
366 lines (334 loc) · 11.1 KB
/
lex.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
type pos_t = string * int * int * int
let show_pos_t (fname, l, c, _) = Printf.sprintf "%s:%d:%d" fname l c
let pp_pos_t f p = Format.pp_print_string f (show_pos_t p)
let string_of_pos_t (file, line, col, _) =
Printf.sprintf "%s:%d:%d" file col line
let nowhere = ("", -1, -1, -1)
let initial_pos fname = (fname, 1, 0, 0)
type t =
| Str of string
| Int of int
| UIdent of string
| Char of char
| LIdent of string
| TVar of string
| TInt
| TBool
| TString
| Type
| Of
| Dot
| True
| False
| Add
| Sub
| Mul
| Div
| Mod
| Cons
| Gret
| Less
| Eq
| Deref
| Neq
| And
| Or
| Pipeline
| AtAt
| Assign
| ArrayAssign
| LP
| RP
| LB
| RB
| Semicol
| Comma
| VBar
| Arrow
| Fun
| If
| Then
| Else
| Let
| In
| Rec
| AndDef
| Match
| When
| With
| Not
| Ref
| As
| Eof
let rec show = function
| Str s -> Printf.sprintf "Str(%s)" s
| Int n -> Printf.sprintf "Int(%d)" n
| Char c -> Printf.sprintf "Char(%c)" c
| True -> "true"
| False -> "false"
| TInt -> "int"
| TBool -> "bool"
| TString -> "string"
| Type -> "type"
| Of -> "of"
| Dot -> "."
| Add -> "+"
| Sub -> "-"
| Mul -> "*"
| Div -> "/"
| Mod -> "mod"
| Cons -> "::"
| Gret -> ">"
| Less -> "<"
| Eq -> "="
| Deref -> "!"
| Neq -> "<>"
| And -> "&&"
| Or -> "||"
| Pipeline -> "|>"
| AtAt -> "@@"
| Assign -> ":="
| ArrayAssign -> ":=(array)"
| LP -> "("
| RP -> ")"
| LB -> "["
| RB -> "]"
| Semicol -> ";"
| Comma -> ","
| VBar -> "|"
| Arrow -> "->"
| Fun -> "fun"
| If -> "if"
| Then -> "then"
| Else -> "else"
| Let -> "let"
| In -> "in"
| Rec -> "rec"
| AndDef -> "and"
| Match -> "match"
| When -> "when"
| With -> "with"
| Not -> "not"
| Ref -> "ref"
| As -> "as"
| Eof -> "<EOF>"
| UIdent id -> id
| LIdent id -> id
| TVar id -> Printf.sprintf "'%s" id
let count_newline s =
let rec cnt acc i_begin i_end =
if i_begin == i_end then acc
else if s.[i_begin] = '\n' then cnt (acc + 1) (i_begin + 1) i_end
else cnt acc (i_begin + 1) i_end
in
cnt 0
let match_space_char s i =
if i >= String.length s then None
else if s.[i] = ' ' || s.[i] = '\t' || s.[i] = '\n' || s.[i] = '\r' then
Some (i + 1)
else None
let match_alph_char s i =
if i >= String.length s then None
else
let code = Char.code s.[i] in
if (code >= 0x41 && code <= 0x5a) || (code >= 0x61 && code <= 0x7a) then
Some (i + 1)
else None
let match_alph_lower_char s i =
if i >= String.length s then None
else
let code = Char.code s.[i] in
if code >= 0x61 && code <= 0x7a then Some (i + 1) else None
let match_alph_upper_char s i =
if i >= String.length s then None
else
let code = Char.code s.[i] in
if code >= 0x41 && code <= 0x5a then Some (i + 1) else None
let match_num_char s i =
if i >= String.length s then None
else
let code = Char.code s.[i] in
if code >= 0x30 && code <= 0x39 then Some (i + 1) else None
let match_hexnum_char s i =
if i >= String.length s then None
else
let code = Char.code s.[i] in
if
(code >= 0x30 && code <= 0x39)
|| (code >= 0x41 && code <= 0x46)
|| (code >= 0x61 && code <= 0x66)
then Some (i + 1)
else None
let match_char c s i =
if i >= String.length s then None
else if c = s.[i] then Some (i + 1)
else None
let match_str pat s i =
if i + String.length pat > String.length s then None
else if pat = String.sub s i (String.length pat) then
Some (i + String.length pat)
else None
let opt pat s =
let rec f acc i =
match (acc, pat s i) with
| _, Some res -> f (Some res) (i + 1)
| before, None -> before
in
f None
let star pat s i =
let rec f acc i =
match (acc, pat s i) with
| _, Some res -> f (Some res) (i + 1)
| before, None -> before
in
f (Some i) i
let comb_or pat1 pat2 s i =
match pat1 s i with Some i -> Some i | None -> pat2 s i
let match_strlit s i =
let rec f i =
if i >= String.length s then None
else if i + 1 < String.length s && s.[i] = '\\' && s.[i + 1] = '\"' then
f (i + 2)
else if s.[i] = '"' then Some (i + 1)
else f (i + 1)
in
if i + 1 >= String.length s then None
else if s.[i] = '"' then f (i + 1)
else None
let match_charlit s i =
if
i + 3 < String.length s
&& s.[i] = '\''
&& s.[i + 1] == '\\'
&& s.[i + 3] == '\''
then Some (i + 4)
else if i + 2 < String.length s && s.[i] = '\'' && s.[i + 2] == '\'' then
Some (i + 3)
else None
let chain pat1 pat2 s i =
match pat1 s i with Some i -> pat2 s i | None -> None
let match_space = opt match_space_char
let match_lower_ident =
chain match_alph_lower_char
@@ star (comb_or match_alph_char (comb_or match_num_char (match_char '_')))
let match_upper_ident =
chain match_alph_upper_char
@@ star (comb_or match_alph_char (comb_or match_num_char (match_char '_')))
let match_tvar = chain (match_char '\'') match_lower_ident
let match_int = opt match_num_char
let match_hexint = chain (match_str "0x") (opt match_hexnum_char)
let update_pos s (fname, line, col, i_start) i_end =
let rec f (col, el_cnt) i =
if i >= i_end then (col, el_cnt)
else if s.[i] = '\n' then f (0, el_cnt + 1) (i + 1)
else f (col + 1, el_cnt) (i + 1)
in
let col, el_cnt = f (col, 0) i_start in
(fname, line + el_cnt, col, i_end)
exception LexException of pos_t
let rec lex s pos =
let (_, _, _, i) = pos in
let take e = String.sub s i (e-i) in
if i = String.length s
then [Eof, pos]
else
let pos = update_pos s pos in
match match_strlit s i with
| Some i' ->
let inner = take i' in
(Str (String.sub inner 1 ((String.length inner) - 2)), pos (i+1)) :: lex s (pos i')
| None -> match match_charlit s i with
| Some i' ->
let inner = take i' in
(Char (inner.[(String.length inner) - 2]), pos (i+1)) :: lex s (pos i')
| None -> match match_tvar s i with
| Some i' -> let inner = take i' in
(TVar (String.sub inner 1 ((String.length inner) - 1)), pos (i+1)) :: lex s (pos i')
| None -> match match_space s i with
| Some i' -> lex s (pos (i+1))
| None -> match match_hexint s i with
| Some i' ->
(Int (int_of_string @@ take i'), pos (i+1)) :: lex s (pos i')
| None -> match match_int s i with
| Some i' ->
(Int (int_of_string @@ take i'), pos (i+1)) :: lex s (pos i')
(* 愚直すぎ (末尾再帰の最適化を狙っています。許して) *)
| None -> match match_str "." s i with
| Some i' -> (Dot, pos (i+1)) :: lex s (pos i')
| None -> match match_str "::" s i with
| Some i' -> (Cons, pos (i+1)) :: lex s (pos i')
| None -> match match_str "->" s i with
| Some i' -> (Arrow, pos (i+1)) :: lex s (pos i')
| None -> match match_str "<-" s i with
| Some i' -> (ArrayAssign, pos (i+1)) :: lex s (pos i')
| None -> match match_str ":=" s i with
| Some i' -> (Assign, pos (i+1)) :: lex s (pos i')
| None -> match match_str "+" s i with
| Some i' -> (Add, pos (i+1)) :: lex s (pos i')
| None -> match match_str "-" s i with
| Some i' -> (Sub, pos (i+1)) :: lex s (pos i')
| None -> match match_str "*" s i with
| Some i' -> (Mul, pos (i+1)) :: lex s (pos i')
| None -> match match_str "/" s i with
| Some i' -> (Div, pos (i+1)) :: lex s (pos i')
| None -> match match_str "|>" s i with
| Some i' -> (Pipeline, pos (i+1)) :: lex s (pos i')
| None -> match match_str "@@" s i with
| Some i' -> (AtAt, pos (i+1)) :: lex s (pos i')
| None -> match match_str "||" s i with
| Some i' -> (Or, pos (i+1)) :: lex s (pos i')
| None -> match match_str "&&" s i with
| Some i' -> (And, pos (i+1)) :: lex s (pos i')
| None -> match match_str "=" s i with
| Some i' -> (Eq, pos (i+1)) :: lex s (pos i')
| None -> match match_str "!" s i with
| Some i' -> (Deref, pos (i+1)) :: lex s (pos i')
| None -> match match_str "<>" s i with
| Some i' -> (Neq, pos (i+1)) :: lex s (pos i')
| None -> match match_str ">" s i with
| Some i' -> (Gret, pos (i+1)) :: lex s (pos i')
| None -> match match_str "<" s i with
| Some i' -> (Less, pos (i+1)) :: lex s (pos i')
| None -> match match_str "(" s i with
| Some i' -> (LP, pos (i+1)) :: lex s (pos i')
| None -> match match_str ")" s i with
| Some i' -> (RP, pos (i+1)) :: lex s (pos i')
| None -> match match_str "[" s i with
| Some i' -> (LB, pos (i+1)) :: lex s (pos i')
| None -> match match_str "]" s i with
| Some i' -> (RB, pos (i+1)) :: lex s (pos i')
| None -> match match_str ";" s i with
| Some i' -> (Semicol, pos (i+1)) :: lex s (pos i')
| None -> match match_str "," s i with
| Some i' -> (Comma, pos (i+1)) :: lex s (pos i')
| None -> match match_str "|" s i with
| Some i' -> (VBar, pos (i+1)) :: lex s (pos i')
| None -> match match_upper_ident s i with
| Some i' -> (UIdent (take i'), pos (i+1)) :: lex s (pos i')
| None -> match match_lower_ident s i with
| Some i' -> begin match take i' with
| "and" -> (AndDef, pos (i+1)) :: lex s (pos i')
| "type" -> (Type, pos (i+1)) :: lex s (pos i')
| "of" -> (Of, pos (i+1)) :: lex s (pos i')
| "as" -> (As, pos (i+1)) :: lex s (pos i')
| "int" -> (TInt, pos (i+1)) :: lex s (pos i')
| "bool" -> (TBool, pos (i+1)) :: lex s (pos i')
| "string" -> (TString, pos (i+1)) :: lex s (pos i')
| "true" -> (True, pos (i+1)) :: lex s (pos i')
| "false" -> (False, pos (i+1)) :: lex s (pos i')
| "if" -> (If, pos (i+1)) :: lex s (pos i')
| "then" -> (Then, pos (i+1)) :: lex s (pos i')
| "else" -> (Else, pos (i+1)) :: lex s (pos i')
| "let" -> (Let, pos (i+1)) :: lex s (pos i')
| "rec" -> (Rec, pos (i+1)) :: lex s (pos i')
| "in" -> (In, pos (i+1)) :: lex s (pos i')
| "fun" -> (Fun, pos (i+1)) :: lex s (pos i')
| "match" -> (Match, pos (i+1)) :: lex s (pos i')
| "when" -> (When, pos (i+1)) :: lex s (pos i')
| "with" -> (With, pos (i+1)) :: lex s (pos i')
| "mod" -> (Mod, pos (i+1)) :: lex s (pos i')
| ident -> (LIdent ident, pos (i+1)) :: lex s (pos i')
end
| None -> Printf.printf "lex: %s %d\n" s i; raise (LexException (pos 0))
[@@ocamlformat "disable"]
let f fname src = lex src @@ initial_pos fname