-
Notifications
You must be signed in to change notification settings - Fork 194
/
Lexer.java
227 lines (218 loc) · 9.24 KB
/
Lexer.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import static java.lang.System.exit;
import java.util.regex.Pattern;
/**
*
* @author wangjs
*/
//This class is used to read each char and get the tokens
public class Lexer {
//Encoding format
private String encoding;
//File path
private String path;
//Directory path
private static String folderPath;
//File object
private File file;
//Variable used to store the line number
private int lineNumber;
//String variable used to store the text content stored in the source code file
private String textContent;
//Variable used to store the index used to record which characters have been accessed
private int index;
//Variable used to control whether the line number is needed to be increased
public static boolean newLineCheck = true;
//Constructor for initializing the lexer when parser needs to traverse the tokens from specific file
public Lexer(String fileName) {
encoding = "GBK";
path = folderPath + File.separator + fileName;
file = new File(path);
textContent = "";
lineNumber = 1;
index = 0;
}
//Constructor for initializing the lexer when file is read from the specific directory
public Lexer(String folderPath, String fileName) {
encoding = "GBK";
this.folderPath = folderPath;
path = folderPath + file.separator + fileName;
file = new File(path);
textContent = "";
lineNumber = 1;
index = 0;
}
//Read the source code from the file into the program
public void initLocalFile() {
try {
//String variable used to store the source codes
textContent = "";
//Check whether the file read is available
if (file.isFile() && file.exists()) {
InputStreamReader read = new InputStreamReader(
new FileInputStream(file), encoding);
BufferedReader bufferedReader = new BufferedReader(read);
String lineText = null;
while ((lineText = bufferedReader.readLine()) != null) {
textContent += (lineText + "\\n");
}
read.close();
} else {
System.out.println("Cannot find the file");
}
} catch (Exception e) {
System.out.println("Error: class not exists.");
e.printStackTrace();
}
}
//Method used to get the folder path
public String getFolderPath() {
return folderPath;
}
//Change the index used to record the number of characters which have been read already
public void setReadIndex(int value) {
index = value;
}
//Method used to return the current index for recording the number of characters which have been read already
public int getReadIndex() {
return index;
}
//Method used to return the source codes
public String getTextContent() {
return textContent;
}
//Method used to create the error information when the format of comments are not suitable
private void error(String errorInfor) {
System.out.printf("%s", errorInfor);
exit(0);
}
//Method used to operate the token exactly
private Token TokenOperation(String operationType) {
Token token = new Token();
String TokenName = "";
Pattern pattern = Pattern.compile("^[-\\+]?[\\d]+$");
while (index < textContent.length() - 2) {
if (textContent.charAt(index) == ' ') {
index++;
continue;
} else if (Character.isLowerCase(textContent.charAt(index))
|| Character.isUpperCase(textContent.charAt(index))
|| Character.isDigit(textContent.charAt(index))) {
TokenName += textContent.charAt(index);
index++;
if (index < textContent.length()
&& !Character.isLowerCase(textContent.charAt(index))
&& !Character.isUpperCase(textContent.charAt(index))
&& !Character.isDigit(textContent.charAt(index))) {
break;
}
} else {
TokenName += textContent.charAt(index);
TokenName = TokenName.trim();
index++;
if (token.Symbols.containsKey(TokenName)) {
if (TokenName.startsWith("/")) {
if (TokenName.equals("/")) {
if (Character.isLowerCase(textContent.charAt(index))
|| Character.isUpperCase(textContent.charAt(index))
|| Character.isDigit(textContent.charAt(index))
|| textContent.charAt(index) == ' ') {
break;
} else {
continue;
}
} else if (TokenName.equals("//") || TokenName.equals("/*")) {
if (TokenName.equals("/*")) {
if (textContent.indexOf("*/", index) == -1) {
error("Error: comment doesn't have the end symbol, line: " + lineNumber);
}
String comment = textContent.substring(index,
textContent.indexOf("*/", index));
String[] comment_s = comment.split("\\\\n");
lineNumber += comment_s.length - 1;
TokenName += comment + "*/";
index = textContent.indexOf("*/", index) + 2;
TokenName = "";
continue;
} else {
Token com_token = new Token();
String comment = textContent.substring(index,
textContent.indexOf("\\n", index));
TokenName += comment;
index = textContent.indexOf("\\n", index);
TokenName = "";
continue;
}
}
} else if (TokenName.startsWith("\"")) {
Token string_token = new Token();
int i = textContent.indexOf("\"", index);
if (i > 0) {
TokenName += textContent.substring(index, i + 1);
string_token.setToken(TokenName,
string_token.Type.String, lineNumber);
index = i + 1;
TokenName = "";
return string_token;
}
break;
} else {
break;
}
} else {
if (TokenName.startsWith("\\")) {
if (textContent.charAt(index) == 'n') {
if (newLineCheck && operationType.equals("get")) {
lineNumber++;
}
TokenName = "";
index++;
continue;
}
}
}
}
}
//Identify the type of the token and return the token
if (!TokenName.equals("")) {
TokenName = TokenName.trim();
while (true) {
if (token.Symbols.containsKey(TokenName)) {
token.setToken(TokenName, token.Type.Symbol, lineNumber);
break;
} else {
for (String keyword : token.Keywords) {
if (keyword.equals(TokenName)) {
token.setToken(TokenName, token.Type.Keyword, lineNumber);
}
}
if (token.Token != "") {
break;
}
}
if (pattern.matcher(TokenName).matches()) {
token.setToken(TokenName, token.Type.Constant, lineNumber);
break;
} else {
token.setToken(TokenName, token.Type.ID, lineNumber);
break;
}
}
}
return token;
}
//Get the next token from the source code and move the index
public Token GetNextToken() {
return TokenOperation("get");
}
//Peek the next token but not move the index
public Token PeekNextToken() {
int OldIndex = index;
Token token = TokenOperation("peek");
index = OldIndex;
return token;
}
}