-
Notifications
You must be signed in to change notification settings - Fork 254
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
1,308 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
var x = 10; | ||
function add(x, y) { | ||
return x + y; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,265 @@ | ||
package lexer; | ||
|
||
%% | ||
|
||
%class Lexer | ||
%type Token | ||
%line | ||
%column | ||
%unicode | ||
|
||
%{ | ||
public static class Token { | ||
public enum Type { | ||
VAR, FUNCTION, RETURN, IF, ELSE, WHILE, | ||
|
||
IDENTIFIER, NUMBER, BOOLEAN, | ||
|
||
PLUS, MINUS, MULTIPLY, DIVIDE, MODULO, | ||
ASSIGN, PLUS_ASSIGN, MINUS_ASSIGN, MULTIPLY_ASSIGN, DIVIDE_ASSIGN, | ||
|
||
EQUAL, NOT_EQUAL, LESS_THAN, GREATER_THAN, | ||
LESS_OR_EQUAL, GREATER_OR_EQUAL, | ||
|
||
LOGICAL_AND, LOGICAL_OR, LOGICAL_NOT, | ||
|
||
BITWISE_AND, BITWISE_OR, BITWISE_XOR, | ||
BITWISE_NOT, LEFT_SHIFT, RIGHT_SHIFT, | ||
|
||
LPAREN, RPAREN, LBRACE, RBRACE, | ||
LBRACKET, RBRACKET, COMMA, SEMICOLON, | ||
|
||
ERROR, EOF | ||
} | ||
|
||
public Type type; | ||
public String value; | ||
public int line; | ||
public int column; | ||
|
||
public Token(Type type, String value, int line, int column) { | ||
this.type = type; | ||
this.value = value; | ||
this.line = line; | ||
this.column = column; | ||
} | ||
|
||
@Override | ||
public String toString() { | ||
return String.format("%s: %s (Line: %d, Column: %d)", | ||
type, value, line, column); | ||
} | ||
} | ||
|
||
private void log(String category, String value) { | ||
System.out.printf("%s: %s\n", category, value); | ||
} | ||
%} | ||
|
||
DIGIT = [0-9] | ||
LETTER = [a-zA-Z_] | ||
IDENTIFIER = {LETTER}({LETTER}|{DIGIT})* | ||
NUMBER = ({DIGIT}+("."{DIGIT}+)?)|("."{DIGIT}+) | ||
SCIENTIFIC_NUMBER = {NUMBER}[eE][-+]?{DIGIT}+ | ||
BOOLEAN = "true"|"false" | ||
WHITESPACE = [ \t\n\r]+ | ||
SINGLE_COMMENT = "//".* | ||
MULTI_COMMENT = "/*"([^*]|\*+[^*/])*\*+"/" | ||
|
||
%% | ||
"var" { | ||
log("KEYWORD", yytext()); | ||
return new Token(Token.Type.VAR, yytext(), yyline, yycolumn); | ||
} | ||
"function" { | ||
log("KEYWORD", yytext()); | ||
return new Token(Token.Type.FUNCTION, yytext(), yyline, yycolumn); | ||
} | ||
"return" { | ||
log("KEYWORD", yytext()); | ||
return new Token(Token.Type.RETURN, yytext(), yyline, yycolumn); | ||
} | ||
"if" { | ||
log("KEYWORD", yytext()); | ||
return new Token(Token.Type.IF, yytext(), yyline, yycolumn); | ||
} | ||
"else" { | ||
log("KEYWORD", yytext()); | ||
return new Token(Token.Type.ELSE, yytext(), yyline, yycolumn); | ||
} | ||
"while" { | ||
log("KEYWORD", yytext()); | ||
return new Token(Token.Type.WHILE, yytext(), yyline, yycolumn); | ||
} | ||
|
||
{BOOLEAN} { | ||
log("BOOLEAN", yytext()); | ||
return new Token(Token.Type.BOOLEAN, yytext(), yyline, yycolumn); | ||
} | ||
|
||
{IDENTIFIER} { | ||
log("IDENTIFIER", yytext()); | ||
return new Token(Token.Type.IDENTIFIER, yytext(), yyline, yycolumn); | ||
} | ||
|
||
{NUMBER} { | ||
String category = yytext().contains(".") ? "NUMBER (FLOAT)" : "NUMBER (INTEGER)"; | ||
log(category, yytext()); | ||
return new Token(Token.Type.NUMBER, yytext(), yyline, yycolumn); | ||
} | ||
|
||
{SCIENTIFIC_NUMBER} { | ||
log("NUMBER (SCIENTIFIC)", yytext()); | ||
return new Token(Token.Type.NUMBER, yytext(), yyline, yycolumn); | ||
} | ||
|
||
"," { | ||
log("DELIMITER", yytext()); | ||
return new Token(Token.Type.COMMA, yytext(), yyline, yycolumn); | ||
} | ||
|
||
"+" { | ||
log("ARITHMETIC", yytext()); | ||
return new Token(Token.Type.PLUS, yytext(), yyline, yycolumn); | ||
} | ||
"-" { | ||
log("ARITHMETIC", yytext()); | ||
return new Token(Token.Type.MINUS, yytext(), yyline, yycolumn); | ||
} | ||
"*" { | ||
log("ARITHMETIC", yytext()); | ||
return new Token(Token.Type.MULTIPLY, yytext(), yyline, yycolumn); | ||
} | ||
"/" { | ||
log("ARITHMETIC", yytext()); | ||
return new Token(Token.Type.DIVIDE, yytext(), yyline, yycolumn); | ||
} | ||
"%" { | ||
log("ARITHMETIC", yytext()); | ||
return new Token(Token.Type.MODULO, yytext(), yyline, yycolumn); | ||
} | ||
|
||
"==" { | ||
log("COMPARISON", yytext()); | ||
return new Token(Token.Type.EQUAL, yytext(), yyline, yycolumn); | ||
} | ||
"!=" { | ||
log("COMPARISON", yytext()); | ||
return new Token(Token.Type.NOT_EQUAL, yytext(), yyline, yycolumn); | ||
} | ||
"<" { | ||
log("COMPARISON", yytext()); | ||
return new Token(Token.Type.LESS_THAN, yytext(), yyline, yycolumn); | ||
} | ||
">" { | ||
log("COMPARISON", yytext()); | ||
return new Token(Token.Type.GREATER_THAN, yytext(), yyline, yycolumn); | ||
} | ||
"<=" { | ||
log("COMPARISON", yytext()); | ||
return new Token(Token.Type.LESS_OR_EQUAL, yytext(), yyline, yycolumn); | ||
} | ||
">=" { | ||
log("COMPARISON", yytext()); | ||
return new Token(Token.Type.GREATER_OR_EQUAL, yytext(), yyline, yycolumn); | ||
} | ||
|
||
"&&" { | ||
log("LOGICAL", yytext()); | ||
return new Token(Token.Type.LOGICAL_AND, yytext(), yyline, yycolumn); | ||
} | ||
"||" { | ||
log("LOGICAL", yytext()); | ||
return new Token(Token.Type.LOGICAL_OR, yytext(), yyline, yycolumn); | ||
} | ||
"!" { | ||
log("LOGICAL", yytext()); | ||
return new Token(Token.Type.LOGICAL_NOT, yytext(), yyline, yycolumn); | ||
} | ||
|
||
"&" { | ||
log("BITWISE", yytext()); | ||
return new Token(Token.Type.BITWISE_AND, yytext(), yyline, yycolumn); | ||
} | ||
"|" { | ||
log("BITWISE", yytext()); | ||
return new Token(Token.Type.BITWISE_OR, yytext(), yyline, yycolumn); | ||
} | ||
"^" { | ||
log("BITWISE", yytext()); | ||
return new Token(Token.Type.BITWISE_XOR, yytext(), yyline, yycolumn); | ||
} | ||
"~" { | ||
log("BITWISE", yytext()); | ||
return new Token(Token.Type.BITWISE_NOT, yytext(), yyline, yycolumn); | ||
} | ||
"<<" { | ||
log("BITWISE", yytext()); | ||
return new Token(Token.Type.LEFT_SHIFT, yytext(), yyline, yycolumn); | ||
} | ||
">>" { | ||
log("BITWISE", yytext()); | ||
return new Token(Token.Type.RIGHT_SHIFT, yytext(), yyline, yycolumn); | ||
} | ||
|
||
"(" { | ||
log("DELIMITER", yytext()); | ||
return new Token(Token.Type.LPAREN, yytext(), yyline, yycolumn); | ||
} | ||
")" { | ||
log("DELIMITER", yytext()); | ||
return new Token(Token.Type.RPAREN, yytext(), yyline, yycolumn); | ||
} | ||
"{" { | ||
log("DELIMITER", yytext()); | ||
return new Token(Token.Type.LBRACE, yytext(), yyline, yycolumn); | ||
} | ||
"}" { | ||
log("DELIMITER", yytext()); | ||
return new Token(Token.Type.RBRACE, yytext(), yyline, yycolumn); | ||
} | ||
"[" { | ||
log("DELIMITER", yytext()); | ||
return new Token(Token.Type.LBRACKET, yytext(), yyline, yycolumn); | ||
} | ||
"]" { | ||
log("DELIMITER", yytext()); | ||
return new Token(Token.Type.RBRACKET, yytext(), yyline, yycolumn); | ||
} | ||
|
||
"=" { | ||
log("ASSIGNMENT", yytext()); | ||
return new Token(Token.Type.ASSIGN, yytext(), yyline, yycolumn); | ||
} | ||
"+=" { | ||
log("ASSIGNMENT", yytext()); | ||
return new Token(Token.Type.PLUS_ASSIGN, yytext(), yyline, yycolumn); | ||
} | ||
"-=" { | ||
log("ASSIGNMENT", yytext()); | ||
return new Token(Token.Type.MINUS_ASSIGN, yytext(), yyline, yycolumn); | ||
} | ||
"*=" { | ||
log("ASSIGNMENT", yytext()); | ||
return new Token(Token.Type.MULTIPLY_ASSIGN, yytext(), yyline, yycolumn); | ||
} | ||
"/=" { | ||
log("ASSIGNMENT", yytext()); | ||
return new Token(Token.Type.DIVIDE_ASSIGN, yytext(), yyline, yycolumn); | ||
} | ||
|
||
";" { | ||
log("DELIMITER", yytext()); | ||
return new Token(Token.Type.SEMICOLON, yytext(), yyline, yycolumn); | ||
} | ||
|
||
{WHITESPACE} { /* 忽略空白 */ } | ||
{SINGLE_COMMENT} { /* 忽略单行注释 */ } | ||
{MULTI_COMMENT} { /* 忽略多行注释 */ } | ||
|
||
. { | ||
log("UNEXPECTED CHARACTER", yytext()); | ||
return new Token(Token.Type.ERROR, yytext(), yyline, yycolumn); | ||
} | ||
|
||
<<EOF>> { return null; } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
// 赋值 左括号 右括号 左大括号 右大括号 加号 分号 逗号 | ||
const ASSIGN = 'ASSIGN', LPAREN = 'LPAREN', RPAREN = 'RPAREN', LBRACE = 'LBRACE', RBRACE = 'RBRACE', PLUS = 'PLUS', SEMICOLON = 'SEMICOLON', COMMA = 'COMMA'; | ||
// var 标识符 数字 函数 | ||
const VAR = 'VAR', IDENTIFIER = 'IDENTIFIER', NUMBER = 'NUMBER', FUNCTION = 'FUNCTION'; | ||
function lex(input) { | ||
let tokens = [] | ||
let position = 0 | ||
while (position < input.length) { | ||
switch (input[position]) { | ||
// 有特殊作用的单个字符 | ||
case '=': | ||
tokens.push({type: ASSIGN, value: '='}); position++; break; | ||
case '(': | ||
tokens.push({type: LPAREN, value: '('}); position++; break; | ||
case ')': | ||
tokens.push({type: RPAREN, value: ')'}); position++; break; | ||
case '{': | ||
tokens.push({type: LBRACE, value: '{'}); position++; break; | ||
case '}': | ||
tokens.push({type: RBRACE, value: '}'}); position++; break; | ||
case '+': | ||
tokens.push({type: PLUS, value: '+'}); position++; break; | ||
case ';': | ||
tokens.push({type: SEMICOLON, value: ';'}); position++; break; | ||
case ',': | ||
tokens.push({type: COMMA, value: ','}); position++; break; | ||
// 空格 tab 换行跳过即可,不需要解析 | ||
case ' ': | ||
case '\t': | ||
case '\r': | ||
case '\n': | ||
position++; break; | ||
// 剩下数字或字母 | ||
default: | ||
let start = position | ||
// 数字类型 | ||
while (input[position] >= '0' && input[position] <= '9') { | ||
position++ | ||
} | ||
if (start != position) { | ||
tokens.push({type: NUMBER, value: input.substring(start, position)}) | ||
continue; | ||
} | ||
// 字母类型 | ||
//// 首字符必须为字母下划线,后面的可以是数字 | ||
if (input[position] >= 'a' && input[position] <= 'z' || input[position] >= 'A' && input[position] <= 'Z' || input[position] == '_') { | ||
do { | ||
position++ | ||
} while (input[position] >= '0' && input[position] <= '9' || input[position] >= 'a' && input[position] <= 'z' || input[position] >= 'A' && input[position] <= 'Z' || input[position] == '_') | ||
tokens.push({type: IDENTIFIER, value: input.substring(start, position)}) | ||
continue; | ||
} | ||
// 不认识的字符抛出异常 | ||
throw new Error('unexpected input'); | ||
} | ||
} | ||
return tokens | ||
} | ||
|
||
let input = `var x = 10; | ||
function add(x, y) { | ||
return x + y; | ||
}` | ||
|
||
|
||
console.log(lex(input)) |
Oops, something went wrong.