Skip to content

Commit

Permalink
✨ ADD parser语法分析(未完成)
Browse files Browse the repository at this point in the history
  • Loading branch information
sunwu51 committed Dec 16, 2024
1 parent f83054b commit 7f8b63b
Show file tree
Hide file tree
Showing 4 changed files with 905 additions and 93 deletions.
98 changes: 48 additions & 50 deletions 24.11/lex.js → 24.11/lex.mjs
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
const ASSIGN = 'ASSIGN', LPAREN = 'LPAREN', RPAREN = 'RPAREN', LBRACE = 'LBRACE', RBRACE = 'RBRACE', LBRACKET = 'LBRACKET', RBRACKET = 'RBRACKET',
export const ASSIGN = 'ASSIGN', LPAREN = 'LPAREN', RPAREN = 'RPAREN', LBRACE = 'LBRACE', RBRACE = 'RBRACE', LBRACKET = 'LBRACKET', RBRACKET = 'RBRACKET',
SEMICOLON = 'SEMICOLON', COMMA = 'COMMA', PLUS = 'PLUS', MINUS = 'MINUS', MULTIPLY = 'MULTIPLY', DIVIDE = 'DIVIDE', MODULUS = 'MODULUS',
POINT = 'POINT',
AND = 'AND', OR = 'OR', NOT = 'NOT', GT = 'GT', LT = 'LT', GTE = 'GTE', LTE = 'LTE', NEQ = 'NEQ',
BAND = 'BAND', BOR = 'BOR', BXOR = 'BXOR', BNOT = 'BNOT', BSHL = 'BSHL', BSHR = 'BSHR';

const VAR = 'VAR', IDENTIFIER = 'IDENTIFIER', NUMBER = 'NUMBER', STRING = 'STRING', FUNCTION = 'FUNCTION', IF = 'IF', ELSE = 'ELSE', RETURN = 'RETURN', CONTINUE = 'CONTINUE', BREAK = 'BREAK',FOR = "for", WHILE = "while", NEW_LINE='NEW_LINE', EOF = 'EOF';
const KEYWORDS = {
export const VAR = 'VAR', IDENTIFIER = 'IDENTIFIER', NUMBER = 'NUMBER', STRING = 'STRING', FUNCTION = 'FUNCTION', IF = 'IF', ELSE = 'ELSE', RETURN = 'RETURN', CONTINUE = 'CONTINUE', BREAK = 'BREAK',FOR = "for", WHILE = "while", NEW_LINE='NEW_LINE', EOF = 'EOF';
export const KEYWORDS = {
var: VAR,
function: FUNCTION,
if: IF,
Expand All @@ -17,92 +17,99 @@ const KEYWORDS = {
while: WHILE,
}

function lex(input) {
export class Token {
constructor(type, value) {
this.type = type;
this.value = value;
}
}

export function lex(input) {
let tokens = []
let position = 0
while (position < input.length) {
switch (input[position]) {
// 有特殊作用的单个字符
case '=':
if (input[position + 1] == '=') {
tokens.push({type: EQ, value: '=='}); position += 2; break;
tokens.push(new Token('EQ', '==')); position += 2; break;
} else {
tokens.push({type: ASSIGN, value: '='}); position++; break;
tokens.push(new Token(ASSIGN, '=')); position++; break;
}
case '(':
tokens.push({type: LPAREN, value: '('}); position++; break;
tokens.push(new Token(LPAREN, '(')); position++; break;
case ')':
tokens.push({type: RPAREN, value: ')'}); position++; break;
tokens.push(new Token(RPAREN, ')')); position++; break;
case '[':
tokens.push({type: LBRACKET, value: '['}); position++; break;
tokens.push(new Token(LBRACKET, '[')); position++; break;
case ']':
tokens.push({type: RBRACKET, value: ']'}); position++; break;
tokens.push(new Token(RBRACKET, ']')); position++; break;
case '{':
tokens.push({type: LBRACE, value: '{'}); position++; break;
tokens.push(new Token(LBRACE, '{')); position++; break;
case '}':
tokens.push({type: RBRACE, value: '}'}); position++; break;
tokens.push(new Token(RBRACE, '}')); position++; break;
case '+':
tokens.push({type: PLUS, value: '+'}); position++; break;
tokens.push(new Token(PLUS, '+')); position++; break;
case '-':
tokens.push({type: MINUS, value: '-'}); position++; break;
tokens.push(new Token(MINUS, '-')); position++; break;
case '*':
tokens.push({type: MULTIPLY, value: '*'}); position++; break;
tokens.push(new Token(MULTIPLY, '*')); position++; break;
case '/':
tokens.push({type: DIVIDE, value: '/'}); position++; break;
tokens.push(new Token(DIVIDE, '/')); position++; break;
case '%':
tokens.push({type: MODULUS, value: '%'}); position++; break;
tokens.push(new Token(MODULUS, '%')); position++; break;
case '.':
tokens.push({type: POINT, value: '.'}); position++; break;
tokens.push(new Token(POINT, '.')); position++; break;
case '^':
tokens.push({type: BXOR, value: '^'}); position++; break;
tokens.push(new Token(BXOR, '^')); position++; break;
case '~':
tokens.push({type: BNOT, value: '~'}); position++; break;
tokens.push(new Token(BNOT, '~')); position++; break;
case '|':
if (input[position + 1] == '|') {
tokens.push({type: OR, value: '||'}); position += 2; break;
tokens.push(new Token(OR, '||')); position += 2; break;
} else {
tokens.push({type: BOR, value: '|'}); position++; break;
tokens.push(new Token(BOR, '|')); position++; break;
}
case '&':
if (input[position + 1] == '&') {
tokens.push({type: AND, value: '&&'}); position += 2; break;
tokens.push(new Token(AND, '&&')); position += 2; break;
} else {
tokens.push({type: BAND, value: '&'}); position++; break;
tokens.push(new Token(BAND, '&')); position++; break;
}
case '!':
if (input[position + 1] == '=') {
tokens.push({type: NEQ, value: '!='}); position += 2; break;
tokens.push(new Token(NEQ, '!=')); position += 2; break;
} else {
tokens.push({type: NOT, value: '!'}); position++; break;
tokens.push(new Token(NOT, '!')); position++; break;
}
case '<':
if (input[position + 1] == '=') {
tokens.push({type: LTE, value: '<='}); position += 2; break;
tokens.push(new Token(LTE, '<=')); position += 2; break;
} else if (input[position + 1] == '<') {
tokens.push({type: BSHL, value: '<<'}); position += 2; break;
tokens.push(new Token(BSHL, '<<')); position += 2; break;
} else {
tokens.push({type: LT, value: '<'}); position++; break;
tokens.push(new Token(LT, '<')); position++; break;
}
case '>':
if (input[position + 1] == '=') {
tokens.push({type: GTE, value: '>='}); position += 2; break;
tokens.push(new Token(GTE, '>=')); position += 2; break;
} else if (input[position + 1] == '>') {
tokens.push({type: BSHR, value: '>>'}); position += 2; break;
tokens.push(new Token(BSHR, '>>')); position += 2; break;
} else {
tokens.push({type: GT, value: '>'}); position++; break;
tokens.push(new Token(GT, '>')); position++; break;
}
case ';':
tokens.push({type: SEMICOLON, value: ';'}); position++; break;
tokens.push(new Token(SEMICOLON, ';')); position++; break;
case ',':
tokens.push({type: COMMA, value: ','}); position++; break;
tokens.push(new Token(COMMA, ',')); position++; break;
// 空格 tab 跳过即可,不需要解析
case ' ':
case '\t':
case '\r':
position++; break;
// 回车这里解析一下,因为想要支持js的弱判断
case '\n':
tokens.push({type: NEW_LINE, value: '\n'}); position++; break;
tokens.push(new Token(NEW_LINE, '\n')); position++; break;
case '\'':
var start = position;
while (true) {
Expand All @@ -111,7 +118,7 @@ function lex(input) {
if (position >= input.length) throw new Error('Unterminated string');
if (input[position] == '\n') throw new Error('Enter is not allowed in string');
if (input[position] == '\'' && input[position - 1] != '\\' ) {
tokens.push({type: STRING, value: input.substring(start, position + 1)});
tokens.push(new Token(STRING, input.substring(start, position + 1)));
position++;
break;
}
Expand All @@ -125,7 +132,7 @@ function lex(input) {
if (position >= input.length) throw new Error('Unterminated string');
if (input[position] == '\n') throw new Error('Enter is not allowed in string');
if (input[position] == '"' && input[position - 1] != '\\' ) {
tokens.push({type: STRING, value: input.substring(start, position + 1)});
tokens.push(new Token(STRING, input.substring(start, position + 1)));
position++;
break;
}
Expand All @@ -140,7 +147,7 @@ function lex(input) {
position++
}
if (start != position) {
tokens.push({type: NUMBER, value: input.substring(start, position)})
tokens.push(new Token(NUMBER, input.substring(start, position)))
break;
}
// 字母类型
Expand All @@ -150,23 +157,14 @@ function lex(input) {
position++
} while (input[position] >= '0' && input[position] <= '9' || input[position] >= 'a' && input[position] <= 'z' || input[position] >= 'A' && input[position] <= 'Z' || input[position] == '_')
let value = input.substring(start, position)
if (KEYWORDS[value]) tokens.push({type: KEYWORDS[value], value})
else tokens.push({type: IDENTIFIER, value: input.substring(start, position)})
if (KEYWORDS[value]) tokens.push(new Token(KEYWORDS[value], value))
else tokens.push(new Token(IDENTIFIER, value))
break;
}
// 不认识的字符抛出异常
throw new Error('unexpected input');
}
}
tokens.push({type: EOF, value: ''})
tokens.push(new Token(EOF, ''))
return tokens
}

let input = `var x = 10;
var z = 'ff\\'f';
function add(x, y) {
return x + y;
}`


console.log(lex(input))
Loading

0 comments on commit 7f8b63b

Please sign in to comment.