-
Notifications
You must be signed in to change notification settings - Fork 1
/
lex.scm
189 lines (148 loc) · 5.24 KB
/
lex.scm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
; Uncomment these two lines if you are using racket instead of scheme
#lang racket
(provide (all-defined-out))
;;===============================================================
;; The Lexical Analyzer
;=======================================
; get and unget the next symbol from the lexical analyzer
; A 1 symbol buffer is used so the last read symbol can be pushed
; back to the analyzer
(define last-symbol-saved #f) ; is there a symbol buffered?
(define saved-symbol #f) ; the symbol buffer
; gets the next symbol to be processed
;
(define get-next-symbol
(lambda ()
(if last-symbol-saved
(begin
(set! last-symbol-saved #f)
saved-symbol)
(begin
(set! saved-symbol (lex))
(set! last-symbol-saved #f)
saved-symbol))))
; mark the last symbol sent as unread so that it can be read again
;
(define unget-next-symbol
(lambda ()
(begin
(set! last-symbol-saved #t))))
;; A 1 character buffer is used so the last read character can be
;; pushed back
(define saved-last-char #f) ; is the last character buffered?
(define last-read-char #f) ; the character buffer
; read the next character from the file
(define readchar
(lambda (port)
(if saved-last-char
(begin
(set! saved-last-char #f)
last-read-char)
(read-char port))))
; unread the last character from the file so it can be read again
(define unreadchar
(lambda (lastchar port)
(begin (set! last-read-char lastchar)
(set! saved-last-char #t))))
; save the port to the input file
(define file-port '())
; open the input file
(define start-lex
(lambda (filename)
(set! file-port (open-input-file filename))))
; close the input file
(define end-lex
(lambda ()
(close-input-port file-port)))
; the current list of reserved words and operator characters
(define reserved-word-list '(if else return while break continue class extends new throw catch finally try static var true false function))
(define reserved-operator-list '(#\= #\< #\> #\! #\+ #\* #\/ #\- #\% #\& #\| #\!))
; return a lexeme with the next read symbol
(define return-id-lex
(lambda (id)
(if (memq id reserved-word-list)
(if (or (eq? id 'false) (eq? id 'true))
(cons 'BOOLEAN id)
(cons id '()))
(cons 'ID id))))
(define return-num-lex
(lambda (value)
(cons 'NUMBER value)))
(define return-symbol-lex
(lambda (symbol)
(cons 'BINARY-OP symbol)))
;(define return-add-lex
; (lambda (symbol)
; (cons 'MATHOP symbol)))
(define return-left-paren-lex
(lambda ()
(cons 'LEFTPAREN '())))
(define return-right-paren-lex
(lambda ()
(cons 'RIGHTPAREN '())))
(define return-assign-lex
(lambda (symbol)
(cons 'ASSIGN symbol)))
(define return-null-lex
(lambda (symbol)
(cons 'UNKNOWN symbol)))
(define return-semicolon-lex
(lambda ()
(cons 'SEMICOLON '())))
(define return-leftbrace-lex
(lambda ()
(cons 'LEFTBRACE '())))
(define return-rightbrace-lex
(lambda ()
(cons 'RIGHTBRACE '())))
(define return-comma-lex
(lambda ()
(cons 'COMMA '())))
(define return-eof-lex
(lambda ()
(cons 'EOF '())))
(define return-period-lex
(lambda ()
(cons 'BINARY-OP 'dot)))
; The lexical analyer. Keep reading characters until the next symbol is found.
; then return that symbol
(define lex
(lambda ()
(let ((nextchar (readchar file-port)))
(cond ((eof-object? nextchar) (return-eof-lex))
((char-whitespace? nextchar) (lex))
((char-alphabetic? nextchar) (return-id-lex (string->symbol (id-lex file-port (make-string 1 nextchar)))))
((char-numeric? nextchar) (return-num-lex (num-lex file-port (addtointeger 0 nextchar))))
((memq nextchar reserved-operator-list) (return-symbol-lex (string->symbol (symbol-lex file-port (make-string 1 nextchar)))))
((char=? #\( nextchar) (return-left-paren-lex))
((char=? #\) nextchar) (return-right-paren-lex))
((char=? #\; nextchar) (return-semicolon-lex))
((char=? #\{ nextchar) (return-leftbrace-lex))
((char=? #\} nextchar) (return-rightbrace-lex))
((char=? #\, nextchar) (return-comma-lex))
((char=? #\. nextchar) (return-period-lex))
(else (return-null-lex nextchar))))))
(define id-lex
(lambda (fport idstring)
(let ((nextchar (readchar fport)))
(if (or (char-alphabetic? nextchar) (char-numeric? nextchar) (char=? #\_ nextchar))
(id-lex fport (string-append idstring (make-string 1 nextchar)))
(begin (unreadchar nextchar fport)
idstring)))))
(define addtointeger
(lambda (val nextdigit)
(+ (* val 10) (- (char->integer nextdigit) (char->integer #\0)))))
(define num-lex
(lambda (fport value)
(let ((nextchar (readchar fport)))
(if (char-numeric? nextchar)
(num-lex fport (addtointeger value nextchar))
(begin (unreadchar nextchar fport)
value)))))
(define symbol-lex
(lambda (fport idstring)
(let ((nextchar (readchar fport)))
(if (memq nextchar reserved-operator-list)
(symbol-lex fport (string-append idstring (make-string 1 nextchar)))
(begin (unreadchar nextchar fport)
idstring)))))