From 564aabc6c4d3609c4da437cace1c7ddf8d259080 Mon Sep 17 00:00:00 2001 From: frank Date: Wed, 25 Dec 2024 23:38:46 +0800 Subject: [PATCH] =?UTF-8?q?=E2=AC=86=EF=B8=8F=20UPDATE=20=E8=AF=AD?= =?UTF-8?q?=E6=B3=95=E5=88=86=E6=9E=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 24.11/parser_test10.mjs | 423 +++++++++++++++ 24.11/parser_test11.mjs | 501 ++++++++++++++++++ 24.11/parser_test2.mjs | 6 +- 24.11/parser_test3.mjs | 6 +- 24.11/parser_test4.mjs | 6 +- 24.11/parser_test5.mjs | 6 +- 24.11/parser_test6.mjs | 41 +- 24.11/parser_test7.mjs | 53 +- 24.11/parser_test8.mjs | 102 ++-- 24.11/parser_test9.mjs | 98 +--- ...55\346\263\225\345\210\206\346\236\220.md" | 278 ++++++---- 11 files changed, 1198 insertions(+), 322 deletions(-) create mode 100644 24.11/parser_test10.mjs create mode 100644 24.11/parser_test11.mjs diff --git a/24.11/parser_test10.mjs b/24.11/parser_test10.mjs new file mode 100644 index 00000000..a21d64f1 --- /dev/null +++ b/24.11/parser_test10.mjs @@ -0,0 +1,423 @@ +import * as LEX from "./lex.mjs"; +import { lex } from './lex.mjs'; + +class Sentence { + constructor(type) { + if (type) { + this.type = type.toUpperCase() + "_SENTENCE"; + } + } +} +class VarSentence extends Sentence { + constructor(name, value) { + super("VAR"); + this.name = name; // name本身其实也是个表达式 + this.value = value; // 这里的value是个表达式 + } + + toString() { + return `var ${this.name} = ${this.value.toString()};`; + } +} + +class ReturnSentence extends Sentence { + constructor(value) { + super("RETURN"); + this.value = value; // 这里的value也是表达式 + } + toString() { + return `return ${this.value.toString()};`; + } +} + +class BlockSentence extends Sentence { + constructor(sentences) { + super("BLOCK"); + this.sentences = sentences; + } + toString() { + return `{ + ${this.sentences.map(it=>it.toString()).join('\n')} +}` + } +} + +class ExpressionStatement extends Sentence { + constructor(expression) { + super("EXPRESSION"); + this.expression = expression; // 这里的expression也是表达式 + } + + toString() { + return this.expression.toString() + ";"; + } +} +// 基础类型 +class AstNode { +} +// 数字字面量 +class NumberAstNode extends AstNode { + constructor(token) { + super(); + this.token = token; + } + + toString() { + return this.token.value; + } +} +// 变量名/函数名字面量 +class IdentifierAstNode extends AstNode { + constructor(token) { + super(); + this.token = token; + } + + toString() { + return this.token.value; + } +} +// null字面量 +class NullAstNode extends AstNode { + toString() { + return "null"; + } +} + +// 字符串字面量 +class StringAstNode extends AstNode { + constructor(token) { + super(); + this.token = token; + } + toString() { + return this.token.value; + } +} +// boolean字面量 +class BooleanAstNode extends AstNode { + constructor(token) { + super(); + this.token = token; + } + toString() { + return this.token.value; + } +} +// 中缀操作符节点 +class InfixOperatorAstNode extends AstNode { + constructor(token) { + super(); + this.op = token; + this.left = null; + this.right = null; + this.precedence = precedenceMap[token.value]; + } + toString() { + return `(${this.left.toString()} ${this.op.value} ${this.right.toString()})`; + } +} +// 前缀操作符 +class PrefixOperatorAstNode extends AstNode { + constructor(token, right) { + super(false); + this.op = token; + this.right = right; + } + toString() { + return `(${this.op.value} ${this.right.toString()})`; + } +} +// 后缀操作符 +class PostfixOperatorAstNode extends AstNode { + constructor(token, left) { + super(false); + this.op = token; + this.left = left; + } + toString() { + return `(${this.left.toString()} ${this.op.value})`; + } +} +// 函数声明 +class FunctionDeclarationAstNode extends AstNode { + constructor(params, body) { + super(); + this.params = params; + this.body = body; + } + toString() { + return `function(${this.params.join(',')})${this.body.toString()}`; + } +} +// 函数调用 +class FunctionCallAstNode extends AstNode { + constructor(caller, args) { + super(); + this.caller = caller; + this.args = args; + } + toString() { + return `${this.caller.toString()}(${this.args.map(it=>it.toString()).join(',')})` + } +} +// 分组节点 +class GroupAstNode extends AstNode { + constructor(exp) { + super(); + this.exp = exp; + } + toString() { + // 因为小括号已经在运算符的toString中使用了,这里为了更好的凸显使用中文中括号 + return `【${this.exp.toString()}】` + } +} + + + + + + + + + + + + + +const precedenceMap = { + '=': 10, + '||': 11, '&&': 12, '^': 13, + '==': 14, '!=': 14, + '<': 15, '<=': 15, '>': 15, '>=': 15, + '<<': 16, '>>': 16, '>>>': 16, + '+': 17, '-': 17, + '*': 18, '/': 18, '%': 18, +} +const prefixPrecedenceMap = { + '-': 100, + '!': 100, + '~': 100, + '+': 100, + '++': 100, + '--': 100 +} +const postfixPrecedenceMap = { + '++': 200, + '--': 200 +} + +class Parser { + constructor(tokens) { + this.tokens = tokens; + this.cursor = 0; + } + // 语法解析,把tokens转换为sentences + parse() { + var tokens = this.tokens; + var sentences = []; + for (;;) { + var token = tokens[this.cursor]; + var sentence = null; + if (token.type === LEX.SEMICOLON) { + this.cursor++; + continue; + } else if (token.type === LEX.EOF || token.type === LEX.RBRACE) { + break; + } if (token.type === LEX.VAR) { + sentence = this.parseVarSentence(); + } else if (token.type === LEX.RETURN) { + sentence = this.parseReturnSentence(); + } else if (token.type === LEX.LBRACE) { + sentence = this.parseBlockSentence(); + } else { + sentence = this.parseExpressionStatement(); + } + sentences.push(sentence); + } + return sentences; + } + + + parseVarSentence() { + var tokens = this.tokens; + assert (tokens[this.cursor++].type === LEX.VAR); + assert (tokens[this.cursor].type === LEX.IDENTIFIER); + var name = new IdentifierAstNode(tokens[this.cursor ++]); + assert (tokens[this.cursor++].type === LEX.ASSIGN); + var value = this.parseExpression(); + return new VarSentence(name, value); + } + + // 与var语句类似 + parseReturnSentence() { + var tokens = this.tokens; + assert (tokens[this.cursor++].type === LEX.RETURN); + var value = this.parseExpression(); + assert(tokens[this.cursor].type === LEX.SEMICOLON || tokens[this.cursor].type == LEX.EOF); + return new ReturnSentence(value); + } + // 转换为表达式语句 + parseExpressionStatement() { + var tokens = this.tokens; + for (var j = this.cursor; j < tokens.length; j++) { + if (tokens[j].type === LEX.SEMICOLON || tokens[j].type === LEX.EOF) { + var expression = this.parseExpression(); + assert(tokens[this.cursor].type === LEX.SEMICOLON || tokens[this.cursor].type == LEX.EOF); + this.cursor ++; + return new ExpressionStatement(expression); + } + } + } + // 转换为块语句,块语句中包含一个语句数组 + parseBlockSentence() { + var tokens = this.tokens; + assert(tokens[this.cursor++].type === LEX.LBRACE, "brace not open for block sentence") + var result = new BlockSentence(this.parse()); + assert(tokens[this.cursor++].type === LEX.RBRACE, "brace not close for block sentence"); + return result + } + + // 然后修改parseExpression函数,使其接受一个参数,代表前置符号的优先级 + parseExpression(precedence = 0) { + var tokens = this.tokens; + var stack = []; + var mid = null; + while (true) { + // 此时栈为空的时候默认看到的就是上下文传进来的优先级 + var stackTopPrecedence = stack.length == 0 ? precedence: stack[stack.length - 1].precedence; + mid = mid == null ? this.nextUnaryNode() : mid; + var opNode = this.getEofOrInfixNode(tokens, this.cursor); + // 结束循环的条件改为,当前操作符优先级<=上下文优先级 并且 栈为空 + // 这样首先是能兼容为0的情况,其次前缀操作符优先级是比中缀高的,所以前缀操作符传进来的时候一定是遇到中缀就结束 + if (opNode.precedence <= precedence && stackTopPrecedence == precedence) return mid; + if (opNode.op.value == '=' ? opNode.precedence < stackTopPrecedence : opNode.precedence <= stackTopPrecedence) { + var top = stack.pop(); + top.right = mid; + mid = top; + } + else { + opNode.left = mid; + stack.push(opNode); + this.cursor++; + mid = null; + } + } + } + + nextUnaryNode() { + var tokens = this.tokens; + var node = null; + switch (tokens[this.cursor].type) { + case LEX.NUMBER: + node = new NumberAstNode(tokens[this.cursor++]); + break; + case LEX.STRING: + node = new StringAstNode(tokens[this.cursor++]); + break; + case LEX.BOOLEAN: + node = new BooleanAstNode(tokens[this.cursor++]); + break; + case LEX.NULL: + node = new NullAstNode(tokens[this.cursor++]); + break; + // 遇到前缀运算符 + case LEX.PLUS: + case LEX.MINUS: + case LEX.INCREMENT: + case LEX.DECREMENT: + case LEX.NOT: + case LEX.BIT_NOT: + // 使用parseExpression函数递归,但是要传递当前符号的优先级 + node = new PrefixOperatorAstNode(tokens[this.cursor], this.parseExpression(prefixPrecedenceMap[tokens[this.cursor++].value])); + break; + // 分组 + case LEX.LPAREN: + // 递归解析(后面的即可,因为遇到)的时候,parseExpression无法识别,就会结束解析 + this.cursor++; + // GroupAstNode其实可有可无 + node = new GroupAstNode(this.parseExpression()); + assert(tokens[this.cursor++].type == LEX.RPAREN, "group not closed"); + break; + case LEX.IDENTIFIER: + node = new IdentifierAstNode(tokens[this.cursor++]); + // 函数调用 + while (tokens[this.cursor].type == LEX.LPAREN) { + this.cursor++; + var args = []; + while (tokens[this.cursor].type != LEX.RPAREN) { + args.push(this.parseExpression()); + if (tokens[this.cursor].type == LEX.COMMA) { + this.cursor++; + } + } + this.cursor++; + node = new FunctionCallAstNode(node, args); + } + break; + case LEX.FUNCTION: + assert(tokens[++this.cursor].type == LEX.LPAREN, "function need a lparen"); + this.cursor++; + var params = []; + while (tokens[this.cursor].type != LEX.RPAREN) { + assert(tokens[this.cursor].type == LEX.IDENTIFIER); + params.push(new IdentifierAstNode(tokens[this.cursor++])); + if (tokens[this.cursor].type == LEX.COMMA) { + this.cursor++; + } + } + this.cursor++; + var body = this.parseBlockSentence(); + node = new FunctionDeclarationAstNode(params, body) + // 函数声明直接调用,与变量的代码一模一样 + while (tokens[this.cursor].type == LEX.LPAREN) { + this.cursor++; + var args = []; + while (tokens[this.cursor].type != LEX.RPAREN) { + args.push(this.parseExpression()); + if (tokens[this.cursor].type == LEX.COMMA) { + this.cursor++; + } + } + this.cursor++; + node = new FunctionCallAstNode(node, args); + } + break; + default: + throw new Error('unexpected token in nextUnary: ' + tokens[this.cursor].type); + } + while (tokens[this.cursor].type == LEX.INCREMENT || tokens[this.cursor].type == LEX.DECREMENT) { + assert(node instanceof IdentifierAstNode, "INCREMENT/DECREMENT can only be used with identifier"); + node = new PostfixOperatorAstNode(tokens[this.cursor++], node); + } + return node; + } + getEofOrInfixNode(tokens, index) { + var eof = new InfixOperatorAstNode('EOF'); + eof.precedence = 0; + if (index >= tokens.length) return eof + var token = tokens[index]; + if (precedenceMap[token.value] == null) { + return eof; + } + return new InfixOperatorAstNode(tokens[index]); + } + +} + +function assert(condition) { + if (!condition) { + throw new Error("assert failed"); + } +} + + +var code = `var add = function(a, b ) {return a+b;}(1 + a * 3,2)();`; +var code = `var a = b = c = 1`; + +var tokens = lex(code); +var sentences = new Parser(tokens).parse() + +for (var i = 0; i < sentences.length; i++) { + console.log(sentences[i].toString()); +} \ No newline at end of file diff --git a/24.11/parser_test11.mjs b/24.11/parser_test11.mjs new file mode 100644 index 00000000..ab83b802 --- /dev/null +++ b/24.11/parser_test11.mjs @@ -0,0 +1,501 @@ +import * as LEX from "./lex.mjs"; +import { lex } from './lex.mjs'; + +class Sentence { + constructor(type) { + if (type) { + this.type = type.toUpperCase() + "_SENTENCE"; + } + } +} +class EmptySentence extends Sentence { + constructor() { + super("EMPTY"); + } + toString() { + return ""; + } +} +class VarSentence extends Sentence { + constructor(name, value) { + super("VAR"); + this.name = name; // name本身其实也是个表达式 + this.value = value; // 这里的value是个表达式 + } + + toString() { + return `var ${this.name} = ${this.value.toString()};`; + } +} + +class ReturnSentence extends Sentence { + constructor(value) { + super("RETURN"); + this.value = value; // 这里的value也是表达式 + } + toString() { + return `return ${this.value.toString()};`; + } +} + +class BlockSentence extends Sentence { + constructor(sentences) { + super("BLOCK"); + this.sentences = sentences; + } + toString() { + return `{ + ${this.sentences.map(it=>it.toString()).join('\n')} +}` + } +} + +class IfSentence extends Sentence { + constructor(condition, ifBody, elseBody) { + super("IF"); + this.condition = condition; + this.ifBody = ifBody; + this.elseBody = elseBody; + } + toString() { + return `if ${this.condition.toString()} ${this.ifBody.toString()} else ${this.elseBody.toString()} +` + } +} + +class ForSentence extends Sentence { + constructor(init, condition, step, body) { + super("FOR"); + this.init = init; + this.condition = condition; + this.step = step; + this.body = body; + } + toString() { + return `for(${this.init.toString()} ${this.condition.toString()} ${this.step.toString()})${this.body.toString()}` + } +} +class BreakSentence extends Sentence { constructor () { super("BREAK");}} +class ContinueSentence extends Sentence { constructor () { super("CONTINUE");}} + + +class ExpressionStatement extends Sentence { + constructor(expression) { + super("EXPRESSION"); + this.expression = expression; // 这里的expression也是表达式 + } + + toString() { + return this.expression.toString() + ";"; + } +} +// 基础类型 +class AstNode { +} +// 数字字面量 +class NumberAstNode extends AstNode { + constructor(token) { + super(); + this.token = token; + } + + toString() { + return this.token.value; + } +} +// 变量名/函数名字面量 +class IdentifierAstNode extends AstNode { + constructor(token) { + super(); + this.token = token; + } + + toString() { + return this.token.value; + } +} +// null字面量 +class NullAstNode extends AstNode { + toString() { + return "null"; + } +} + +// 字符串字面量 +class StringAstNode extends AstNode { + constructor(token) { + super(); + this.token = token; + } + toString() { + return this.token.value; + } +} +// boolean字面量 +class BooleanAstNode extends AstNode { + constructor(token) { + super(); + this.token = token; + } + toString() { + return this.token.value; + } +} +// 中缀操作符节点 +class InfixOperatorAstNode extends AstNode { + constructor(token) { + super(); + this.op = token; + this.left = null; + this.right = null; + this.precedence = precedenceMap[token.value]; + } + toString() { + return `(${this.left.toString()} ${this.op.value} ${this.right.toString()})`; + } +} +// 前缀操作符 +class PrefixOperatorAstNode extends AstNode { + constructor(token, right) { + super(false); + this.op = token; + this.right = right; + } + toString() { + return `(${this.op.value} ${this.right.toString()})`; + } +} +// 后缀操作符 +class PostfixOperatorAstNode extends AstNode { + constructor(token, left) { + super(false); + this.op = token; + this.left = left; + } + toString() { + return `(${this.left.toString()} ${this.op.value})`; + } +} +// 函数声明 +class FunctionDeclarationAstNode extends AstNode { + constructor(params, body) { + super(); + this.params = params; + this.body = body; + } + toString() { + return `function(${this.params.join(',')})${this.body.toString()}`; + } +} +// 函数调用 +class FunctionCallAstNode extends AstNode { + constructor(caller, args) { + super(); + this.caller = caller; + this.args = args; + } + toString() { + return `${this.caller.toString()}(${this.args.map(it=>it.toString()).join(',')})` + } +} +// 分组节点 +class GroupAstNode extends AstNode { + constructor(exp) { + super(); + this.exp = exp; + } + toString() { + // 因为小括号已经在运算符的toString中使用了,这里为了更好的凸显使用中文中括号 + return `【${this.exp.toString()}】` + } +} + + + + + + + + + + + + + +const precedenceMap = { + '=': 10, + '||': 11, '&&': 12, '^': 13, + '==': 14, '!=': 14, + '<': 15, '<=': 15, '>': 15, '>=': 15, + '<<': 16, '>>': 16, '>>>': 16, + '+': 17, '-': 17, + '*': 18, '/': 18, '%': 18, +} +const prefixPrecedenceMap = { + '-': 100, + '!': 100, + '~': 100, + '+': 100, + '++': 100, + '--': 100 +} +const postfixPrecedenceMap = { + '++': 200, + '--': 200 +} + +class Parser { + constructor(tokens) { + this.tokens = tokens; + this.cursor = 0; + } + // 语法解析,把tokens转换为sentences + parse() { + var sentences = []; + for (;;) { + var item = this.parseSentence(); + if (item == null) break; + if (item instanceof EmptySentence) { + continue; + } + sentences.push(item); + } + return sentences; + } + parseSentence() { + var token = tokens[this.cursor]; + if (token.type === LEX.SEMICOLON) { + this.cursor++; + return new EmptySentence(); + } else if (token.type === LEX.EOF || token.type === LEX.RBRACE || token.type === LEX.RPAREN) { + return null; + } if (token.type === LEX.VAR) { + return this.parseVarSentence(); + } else if (token.type === LEX.RETURN) { + return this.parseReturnSentence(); + } else if (token.type === LEX.LBRACE) { + return this.parseBlockSentence(); + } else if (token.type === LEX.IF) { + return this.parseIfSentence(); + } else if (token.type === LEX.FOR) { + return this.parseForSentence(); + } else if (token.type === LEX.BREAK) { + return new BreakSentence(); + } else if (token.type === LEX.CONTINUE) { + return new ContinueSentence(); + } else { + return this.parseExpressionStatement(); + } + } + + // 从i开始转换成var语句,校验是不是var xx = xxx;格式,然后需要解析表达式parseExpression函数。 + parseVarSentence() { + var tokens = this.tokens; + assert (tokens[this.cursor++].type === LEX.VAR); + assert (tokens[this.cursor].type === LEX.IDENTIFIER); + var name = new IdentifierAstNode(tokens[this.cursor++]); + assert (tokens[this.cursor++].type === LEX.ASSIGN); + var value = this.parseExpression(); + return new VarSentence(name, value); + } + + // 与var语句类似 + parseReturnSentence() { + var tokens = this.tokens; + assert (tokens[this.cursor++].type === LEX.RETURN); + var value = this.parseExpression(); + assert(tokens[this.cursor].type === LEX.SEMICOLON || tokens[this.cursor].type == LEX.EOF); + return new ReturnSentence(value); + } + + // 转换为表达式语句 + parseExpressionStatement() { + var tokens = this.tokens; + var value = this.parseExpression(); + assert(tokens[this.cursor].type === LEX.SEMICOLON || tokens[this.cursor].type == LEX.EOF); + return new ExpressionStatement(value); + } + // 转换为块语句,块语句中包含一个语句数组 + parseBlockSentence() { + var tokens = this.tokens; + assert(tokens[this.cursor++].type === LEX.LBRACE, "brace not open for block sentence") + var result = new BlockSentence(this.parse()); + assert(tokens[this.cursor++].type === LEX.RBRACE, "brace not close for block sentence"); + return result + } + + parseIfSentence() { + var tokens = this.tokens; + assert(tokens[this.cursor++].type == LEX.IF, "if sentence need a if"); // if + assert(tokens[this.cursor++].type == LEX.LPAREN, "if sentence need a LPAREN follow if"); // ( + var condition = this.parseExpression(); // condition + assert(tokens[this.cursor++].type == LEX.RPAREN, "if sentence need a RPAREN follow condition");// ) + var ifBody = this.parseBlockSentence(); // {xxx} + if (tokens[this.cursor].type == LEX.ELSE) { + this.cursor++; // else + var elseBody = this.parseBlockSentence(); // {yyy} + } + return new IfSentence(condition, ifBody, elseBody); + } + + parseForSentence() { + var tokens = this.tokens; + assert(tokens[this.cursor++].type == LEX.FOR, "for sentence need a for"); + assert(tokens[this.cursor++].type == LEX.LPAREN, "for sentence need a LPAREN follow for"); + var init = this.parseSentence(); + assert(tokens[this.cursor++].type == LEX.SEMICOLON, "for sentence error need a SEMICOLON after init"); + var condition = this.parseSentence(); + assert(tokens[this.cursor++].type == LEX.SEMICOLON, "for sentence error need a SEMICOLON after condition"); + var step = this.parseExpression(); + assert(tokens[this.cursor++].type == LEX.RPAREN, "for sentence need a RPAREN follow condition"); + var body = this.parseBlockSentence(); + return new ForSentence(init, condition, step, body); + } + + // 然后修改parseExpression函数,使其接受一个参数,代表前置符号的优先级 + parseExpression(precedence = 0) { + var tokens = this.tokens; + var stack = []; + var mid = null; + while (true) { + // 此时栈为空的时候默认看到的就是上下文传进来的优先级 + var stackTopPrecedence = stack.length == 0 ? precedence: stack[stack.length - 1].precedence; + mid = mid == null ? this.nextUnaryNode() : mid; + var opNode = this.getEofOrInfixNode(tokens, this.cursor); + // 结束循环的条件改为,当前操作符优先级<=上下文优先级 并且 栈为空 + // 这样首先是能兼容为0的情况,其次前缀操作符优先级是比中缀高的,所以前缀操作符传进来的时候一定是遇到中缀就结束 + if (opNode.precedence <= precedence && stackTopPrecedence == precedence) return mid; + if (opNode.op.value == '=' ? opNode.precedence < stackTopPrecedence : opNode.precedence <= stackTopPrecedence) { + var top = stack.pop(); + top.right = mid; + mid = top; + } + else { + opNode.left = mid; + stack.push(opNode); + this.cursor++; + mid = null; + } + } + } + + nextUnaryNode() { + var tokens = this.tokens; + var node = null; + switch (tokens[this.cursor].type) { + case LEX.NUMBER: + node = new NumberAstNode(tokens[this.cursor++]); + break; + case LEX.STRING: + node = new StringAstNode(tokens[this.cursor++]); + break; + case LEX.BOOLEAN: + node = new BooleanAstNode(tokens[this.cursor++]); + break; + case LEX.NULL: + node = new NullAstNode(tokens[this.cursor++]); + break; + // 遇到前缀运算符 + case LEX.PLUS: + case LEX.MINUS: + case LEX.INCREMENT: + case LEX.DECREMENT: + case LEX.NOT: + case LEX.BIT_NOT: + // 使用parseExpression函数递归,但是要传递当前符号的优先级 + node = new PrefixOperatorAstNode(tokens[this.cursor], this.parseExpression(prefixPrecedenceMap[tokens[this.cursor++].value])); + break; + // 分组 + case LEX.LPAREN: + // 递归解析(后面的即可,因为遇到)的时候,parseExpression无法识别,就会结束解析 + this.cursor++; + // GroupAstNode其实可有可无 + node = new GroupAstNode(this.parseExpression()); + assert(tokens[this.cursor++].type == LEX.RPAREN, "group not closed"); + break; + case LEX.IDENTIFIER: + node = new IdentifierAstNode(tokens[this.cursor++]); + // 函数调用 + while (tokens[this.cursor].type == LEX.LPAREN) { + this.cursor++; + var args = []; + while (tokens[this.cursor].type != LEX.RPAREN) { + args.push(this.parseExpression()); + if (tokens[this.cursor].type == LEX.COMMA) { + this.cursor++; + } + } + this.cursor++; + node = new FunctionCallAstNode(node, args); + } + break; + case LEX.FUNCTION: + assert(tokens[++this.cursor].type == LEX.LPAREN, "function need a lparen"); + this.cursor++; + var params = []; + while (tokens[this.cursor].type != LEX.RPAREN) { + assert(tokens[this.cursor].type == LEX.IDENTIFIER); + params.push(new IdentifierAstNode(tokens[this.cursor++])); + if (tokens[this.cursor].type == LEX.COMMA) { + this.cursor++; + } + } + this.cursor++; + var body = this.parseBlockSentence(); + node = new FunctionDeclarationAstNode(params, body) + // 函数声明直接调用,与变量的代码一模一样 + while (tokens[this.cursor].type == LEX.LPAREN) { + this.cursor++; + var args = []; + while (tokens[this.cursor].type != LEX.RPAREN) { + args.push(this.parseExpression()); + if (tokens[this.cursor].type == LEX.COMMA) { + this.cursor++; + } + } + this.cursor++; + node = new FunctionCallAstNode(node, args); + } + break; + default: + throw new Error('unexpected token in nextUnary: ' + tokens[this.cursor].type); + } + while (tokens[this.cursor].type == LEX.INCREMENT || tokens[this.cursor].type == LEX.DECREMENT) { + assert(node instanceof IdentifierAstNode, "INCREMENT/DECREMENT can only be used with identifier"); + node = new PostfixOperatorAstNode(tokens[this.cursor++], node); + } + return node; + } + getEofOrInfixNode(tokens, index) { + var eof = new InfixOperatorAstNode('EOF'); + eof.precedence = 0; + if (index >= tokens.length) return eof + var token = tokens[index]; + if (precedenceMap[token.value] == null) { + return eof; + } + return new InfixOperatorAstNode(tokens[index]); + } + +} + +function assert(condition) { + if (!condition) { + throw new Error("assert failed"); + } +} + + +var code = `var add = function(a, b ) {return a+b;}(1 + a * 3,2)();`; +var code = `var a = b = c = 1;`; +var code = `if (10 > 1) {print("ten");} else {print("not ten");}`; +var code = `for (var i = 0; i < 10; i++) { print(i); }`; + + + +var tokens = lex(code); +var sentences = new Parser(tokens).parse() + +for (var i = 0; i < sentences.length; i++) { + console.log(sentences[i].toString()); +} \ No newline at end of file diff --git a/24.11/parser_test2.mjs b/24.11/parser_test2.mjs index 47828b5d..664ed37b 100644 --- a/24.11/parser_test2.mjs +++ b/24.11/parser_test2.mjs @@ -16,7 +16,7 @@ class VarSentence extends Sentence { } toString() { - return `var ${this.name} = ${this.value.toString()}`; + return `var ${this.name} = ${this.value.toString()};`; } } @@ -26,7 +26,7 @@ class ReturnSentence extends Sentence { this.value = value; // 这里的value也是表达式 } toString() { - return `return ${this.value.toString()}`; + return `return ${this.value.toString()};`; } } @@ -49,7 +49,7 @@ class ExpressionStatement extends Sentence { } toString() { - return this.expression.toString(); + return this.expression.toString() + ";"; } } diff --git a/24.11/parser_test3.mjs b/24.11/parser_test3.mjs index 52f22b90..105b54e1 100644 --- a/24.11/parser_test3.mjs +++ b/24.11/parser_test3.mjs @@ -17,7 +17,7 @@ class VarSentence extends Sentence { } toString() { - return `var ${this.name} = ${this.value.toString()}`; + return `var ${this.name} = ${this.value.toString()};`; } } @@ -27,7 +27,7 @@ class ReturnSentence extends Sentence { this.value = value; // 这里的value也是表达式 } toString() { - return `return ${this.value.toString()}`; + return `return ${this.value.toString()};`; } } @@ -50,7 +50,7 @@ class ExpressionStatement extends Sentence { } toString() { - return this.expression.toString(); + return this.expression.toString() + ";"; } } ///////////////////// 注意 AstNode新增了full属性 diff --git a/24.11/parser_test4.mjs b/24.11/parser_test4.mjs index 88f5740d..55aaa7a3 100644 --- a/24.11/parser_test4.mjs +++ b/24.11/parser_test4.mjs @@ -17,7 +17,7 @@ class VarSentence extends Sentence { } toString() { - return `var ${this.name} = ${this.value.toString()}`; + return `var ${this.name} = ${this.value.toString()};`; } } @@ -27,7 +27,7 @@ class ReturnSentence extends Sentence { this.value = value; // 这里的value也是表达式 } toString() { - return `return ${this.value.toString()}`; + return `return ${this.value.toString()};`; } } @@ -50,7 +50,7 @@ class ExpressionStatement extends Sentence { } toString() { - return this.expression.toString(); + return this.expression.toString() + ";"; } } class AstNode { diff --git a/24.11/parser_test5.mjs b/24.11/parser_test5.mjs index b0feac68..e5c8eb66 100644 --- a/24.11/parser_test5.mjs +++ b/24.11/parser_test5.mjs @@ -17,7 +17,7 @@ class VarSentence extends Sentence { } toString() { - return `var ${this.name} = ${this.value.toString()}`; + return `var ${this.name} = ${this.value.toString()};`; } } @@ -27,7 +27,7 @@ class ReturnSentence extends Sentence { this.value = value; // 这里的value也是表达式 } toString() { - return `return ${this.value.toString()}`; + return `return ${this.value.toString()};`; } } @@ -50,7 +50,7 @@ class ExpressionStatement extends Sentence { } toString() { - return this.expression.toString(); + return this.expression.toString() +";"; } } class AstNode { diff --git a/24.11/parser_test6.mjs b/24.11/parser_test6.mjs index ee341cc5..9258b527 100644 --- a/24.11/parser_test6.mjs +++ b/24.11/parser_test6.mjs @@ -16,7 +16,7 @@ class VarSentence extends Sentence { } toString() { - return `var ${this.name} = ${this.value.toString()}`; + return `var ${this.name} = ${this.value.toString()};`; } } @@ -26,7 +26,7 @@ class ReturnSentence extends Sentence { this.value = value; // 这里的value也是表达式 } toString() { - return `return ${this.value.toString()}`; + return `return ${this.value.toString()};`; } } @@ -49,7 +49,7 @@ class ExpressionStatement extends Sentence { } toString() { - return this.expression.toString(); + return this.expression.toString() + ";"; } } // 基础类型 @@ -139,40 +139,7 @@ class PostfixOperatorAstNode extends AstNode { return `(${this.left.toString()} ${this.op.value})`; } } -// 函数声明 -class FunctionDeclarationAstNode extends AstNode { - constructor(nameToken, params, body) { - super(); - this.name = name == null ? null :new IdentifierAstNode(nameToken); - this.params = params; - this.body = body; - } - toString() { - return `function${this.name ? ' ' + this.name.toString() : ''}(${this.params.join(',')})${this.body.map(it=>it.toString()).join('\n')}`; - } -} -// 函数调用 -class FunctionCallAstNode extends AstNode { - constructor(nameToken, args) { - super(); - this.name = new IdentifierAstNode(nameToken); - this.args = args; // args是ast数组 - } - toString() { - return `${this.name.toString()}(${this.args.map(it=>it.toString()).join(',')})` - } -} -// 分组节点 -class GroupAstNode extends AstNode { - constructor(exp) { - super(); - this.exp = exp; - } - toString() { - // 因为小括号已经在运算符的toString中使用了,这里为了更好的凸显使用中文中括号 - return `【${this.exp.toString()}】` - } -} + // 语法解析,把tokens转换为sentences function parse(tokens) { // 从i开始转换成var语句,校验是不是var xx = xxx;格式,然后需要解析表达式parseExpression函数。 diff --git a/24.11/parser_test7.mjs b/24.11/parser_test7.mjs index 7b6ef52b..057500ae 100644 --- a/24.11/parser_test7.mjs +++ b/24.11/parser_test7.mjs @@ -16,7 +16,7 @@ class VarSentence extends Sentence { } toString() { - return `var ${this.name} = ${this.value.toString()}`; + return `var ${this.name} = ${this.value.toString()};`; } } @@ -26,7 +26,7 @@ class ReturnSentence extends Sentence { this.value = value; // 这里的value也是表达式 } toString() { - return `return ${this.value.toString()}`; + return `return ${this.value.toString()};`; } } @@ -49,7 +49,7 @@ class ExpressionStatement extends Sentence { } toString() { - return this.expression.toString(); + return this.expression.toString() + ";"; } } // 基础类型 @@ -139,40 +139,6 @@ class PostfixOperatorAstNode extends AstNode { return `(${this.left.toString()} ${this.op.value})`; } } -// 函数声明 -class FunctionDeclarationAstNode extends AstNode { - constructor(nameToken, params, body) { - super(); - this.name = name == null ? null :new IdentifierAstNode(nameToken); - this.params = params; - this.body = body; - } - toString() { - return `function${this.name ? ' ' + this.name.toString() : ''}(${this.params.join(',')})${this.body.map(it=>it.toString()).join('\n')}`; - } -} -// 函数调用 -class FunctionCallAstNode extends AstNode { - constructor(nameToken, args) { - super(); - this.name = new IdentifierAstNode(nameToken); - this.args = args; // args是ast数组 - } - toString() { - return `${this.name.toString()}(${this.args.map(it=>it.toString()).join(',')})` - } -} -// 分组节点 -class GroupAstNode extends AstNode { - constructor(exp) { - super(); - this.exp = exp; - } - toString() { - // 因为小括号已经在运算符的toString中使用了,这里为了更好的凸显使用中文中括号 - return `【${this.exp.toString()}】` - } -} @@ -295,15 +261,10 @@ class Parser { // 转换为块语句,块语句中包含一个语句数组 parseBlockSentence() { var tokens = this.tokens; - var braceCount = 0; - for (var j = this.cursor; j < tokens.length; j++) { - if (tokens[j].type == LEX.LBRACE) braceCount++; - if (tokens[j].type == LEX.RBRACE) braceCount--; - if (braceCount == 0) { - return new BlockSentence(parse(tokens.slice(this.cursor + 1, this.cursor = j))); - } - } - throw new Error("brace not close for block sentence") + assert(tokens[this.cursor++].type === LEX.LBRACE, "brace not open for block sentence") + var result = new BlockSentence(this.parse()); + assert(tokens[this.cursor++].type === LEX.RBRACE, "brace not close for block sentence"); + return result } // 表达式解析,解析下一个表达式,遇到无法识别的字符会结束 diff --git a/24.11/parser_test8.mjs b/24.11/parser_test8.mjs index f12b6bf6..d06d4d03 100644 --- a/24.11/parser_test8.mjs +++ b/24.11/parser_test8.mjs @@ -16,7 +16,7 @@ class VarSentence extends Sentence { } toString() { - return `var ${this.name} = ${this.value.toString()}`; + return `var ${this.name} = ${this.value.toString()};`; } } @@ -26,7 +26,7 @@ class ReturnSentence extends Sentence { this.value = value; // 这里的value也是表达式 } toString() { - return `return ${this.value.toString()}`; + return `return ${this.value.toString()};`; } } @@ -49,7 +49,7 @@ class ExpressionStatement extends Sentence { } toString() { - return this.expression.toString(); + return this.expression.toString() + ";"; } } // 基础类型 @@ -141,25 +141,24 @@ class PostfixOperatorAstNode extends AstNode { } // 函数声明 class FunctionDeclarationAstNode extends AstNode { - constructor(nameToken, params, body) { + constructor(params, body) { super(); - this.name = name == null ? null :new IdentifierAstNode(nameToken); this.params = params; this.body = body; } toString() { - return `function${this.name ? ' ' + this.name.toString() : ''}(${this.params.join(',')})${this.body.map(it=>it.toString()).join('\n')}`; + return `function(${this.params.join(',')})${this.body.toString()}`; } } // 函数调用 class FunctionCallAstNode extends AstNode { - constructor(nameToken, args) { + constructor(caller, args) { super(); - this.name = new IdentifierAstNode(nameToken); - this.args = args; // args是ast数组 + this.caller = caller; + this.args = args; } toString() { - return `${this.name.toString()}(${this.args.map(it=>it.toString()).join(',')})` + return `${this.caller.toString()}(${this.args.map(it=>it.toString()).join(',')})` } } // 分组节点 @@ -218,6 +217,7 @@ class Parser { var token = tokens[this.cursor]; var sentence = null; if (token.type === LEX.SEMICOLON) { + this.cursor++; continue; } else if (token.type === LEX.EOF) { break; @@ -235,75 +235,47 @@ class Parser { return sentences; } - // 从i开始转换成var语句,校验是不是var xx = xxx;格式,然后需要解析表达式parseExpression函数。 parseVarSentence() { var tokens = this.tokens; - assert (tokens[this.cursor].type === LEX.VAR); - assert (tokens[this.cursor + 1].type === LEX.IDENTIFIER); - assert (tokens[this.cursor + 2].type === LEX.ASSIGN); - var name = new IdentifierAstNode(tokens[this.cursor + 1]); - for (var j = this.cursor + 3; j < tokens.length; j++) { - if (tokens[j].type === LEX.SEMICOLON || tokens[j].type === LEX.EOF) { - var value = this.parseExpression(this.cursor = this.cursor + 3); - return new VarSentence(name, value); - } - } - } - // 从i开始转换成var语句,校验是不是var xx = xxx;格式,然后需要解析表达式parseExpression函数。 - parseVarSentence() { - var tokens = this.tokens; - assert (tokens[this.cursor].type === LEX.VAR); - assert (tokens[this.cursor + 1].type === LEX.IDENTIFIER); - assert (tokens[this.cursor + 2].type === LEX.ASSIGN); - var name = new IdentifierAstNode(tokens[this.cursor + 1]); - for (var j = this.cursor + 3; j < tokens.length; j++) { - if (tokens[j].type === LEX.SEMICOLON || tokens[j].type === LEX.EOF) { - this.cursor = this.cursor + 3 - var value = this.parseExpression(); - assert(tokens[this.cursor].type === LEX.SEMICOLON || tokens[this.cursor].type == LEX.EOF); - this.cursor ++; - return new VarSentence(name, value); - } - } + assert (tokens[this.cursor++].type === LEX.VAR); + assert (tokens[this.cursor].type === LEX.IDENTIFIER); + var name = new IdentifierAstNode(tokens[this.cursor++]); + assert (tokens[this.cursor++].type === LEX.ASSIGN); + var value = this.parseExpression(); + return new VarSentence(name, value); } + // 与var语句类似 parseReturnSentence() { var tokens = this.tokens; - assert (tokens[this.cursor].type === LEX.RETURN); - for (var j = this.cursor + 1; j < tokens.length; j++) { - if (tokens[j].type === LEX.SEMICOLON || tokens[j].type === LEX.EOF) { - this.cursor += 1; - var value = this.parseExpression(); - assert(tokens[this.cursor].type === LEX.SEMICOLON || tokens[this.cursor].type == LEX.EOF); - this.cursor ++; - return new ReturnSentence(value); - } - } + assert (tokens[this.cursor++].type === LEX.RETURN); + var value = this.parseExpression(); + assert(tokens[this.cursor].type === LEX.SEMICOLON || tokens[this.cursor].type == LEX.EOF); + return new ReturnSentence(value); } + // 转换为表达式语句 parseExpressionStatement() { var tokens = this.tokens; - for (var j = this.cursor; j < tokens.length; j++) { - if (tokens[j].type === LEX.SEMICOLON || tokens[j].type === LEX.EOF) { - var expression = this.parseExpression(); - assert(tokens[this.cursor].type === LEX.SEMICOLON || tokens[this.cursor].type == LEX.EOF); - this.cursor ++; - return new ExpressionStatement(expression); - } - } + var value = this.parseExpression(); + assert(tokens[this.cursor].type === LEX.SEMICOLON || tokens[this.cursor].type == LEX.EOF); + return new ExpressionStatement(value); } // 转换为块语句,块语句中包含一个语句数组 parseBlockSentence() { var tokens = this.tokens; - var braceCount = 0; - for (var j = this.cursor; j < tokens.length; j++) { - if (tokens[j].type == LEX.LBRACE) braceCount++; - if (tokens[j].type == LEX.RBRACE) braceCount--; - if (braceCount == 0) { - return new BlockSentence(parse(tokens.slice(this.cursor + 1, this.cursor = j))); - } - } - throw new Error("brace not close for block sentence") + assert(tokens[this.cursor++].type === LEX.LBRACE, "brace not open for block sentence") + var result = new BlockSentence(this.parse()); + assert(tokens[this.cursor++].type === LEX.RBRACE, "brace not close for block sentence"); + return result + } + // 转换为块语句,块语句中包含一个语句数组 + parseBlockSentence() { + var tokens = this.tokens; + assert(tokens[this.cursor++].type === LEX.LBRACE, "brace not open for block sentence") + var result = new BlockSentence(this.parse()); + assert(tokens[this.cursor++].type === LEX.RBRACE, "brace not close for block sentence"); + return result } // 然后修改parseExpression函数,使其接受一个参数,代表前置符号的优先级 diff --git a/24.11/parser_test9.mjs b/24.11/parser_test9.mjs index 92093a44..48c46cf5 100644 --- a/24.11/parser_test9.mjs +++ b/24.11/parser_test9.mjs @@ -16,7 +16,7 @@ class VarSentence extends Sentence { } toString() { - return `var ${this.name} = ${this.value.toString()}`; + return `var ${this.name} = ${this.value.toString()};`; } } @@ -26,7 +26,7 @@ class ReturnSentence extends Sentence { this.value = value; // 这里的value也是表达式 } toString() { - return `return ${this.value.toString()}`; + return `return ${this.value.toString()};`; } } @@ -49,7 +49,7 @@ class ExpressionStatement extends Sentence { } toString() { - return this.expression.toString(); + return this.expression.toString() + ";"; } } // 基础类型 @@ -143,25 +143,24 @@ class PostfixOperatorAstNode extends AstNode { } // 函数声明 class FunctionDeclarationAstNode extends AstNode { - constructor(nameToken, params, body) { + constructor(params, body) { super(); - this.name = name == null ? null :new IdentifierAstNode(nameToken); this.params = params; this.body = body; } toString() { - return `function${this.name ? ' ' + this.name.toString() : ''}(${this.params.join(',')})${this.body.map(it=>it.toString()).join('\n')}`; + return `function(${this.params.join(',')})${this.body.toString()}`; } } // 函数调用 class FunctionCallAstNode extends AstNode { - constructor(nameToken, args) { + constructor(caller, args) { super(); - this.name = new IdentifierAstNode(nameToken); - this.args = args; // args是ast数组 + this.caller = caller; + this.args = args; } toString() { - return `${this.name.toString()}(${this.args.map(it=>it.toString()).join(',')})` + return `${this.caller.toString()}(${this.args.map(it=>it.toString()).join(',')})` } } // 分组节点 @@ -185,9 +184,6 @@ class GroupAstNode extends AstNode { - - - const precedenceMap = { '+': 1, '-': 1, @@ -220,6 +216,7 @@ class Parser { var token = tokens[this.cursor]; var sentence = null; if (token.type === LEX.SEMICOLON) { + this.cursor++; continue; } else if (token.type === LEX.EOF) { break; @@ -236,76 +233,39 @@ class Parser { } return sentences; } - - // 从i开始转换成var语句,校验是不是var xx = xxx;格式,然后需要解析表达式parseExpression函数。 parseVarSentence() { var tokens = this.tokens; - assert (tokens[this.cursor].type === LEX.VAR); - assert (tokens[this.cursor + 1].type === LEX.IDENTIFIER); - assert (tokens[this.cursor + 2].type === LEX.ASSIGN); - var name = new IdentifierAstNode(tokens[this.cursor + 1]); - for (var j = this.cursor + 3; j < tokens.length; j++) { - if (tokens[j].type === LEX.SEMICOLON || tokens[j].type === LEX.EOF) { - var value = this.parseExpression(this.cursor = this.cursor + 3); - return new VarSentence(name, value); - } - } - } - // 从i开始转换成var语句,校验是不是var xx = xxx;格式,然后需要解析表达式parseExpression函数。 - parseVarSentence() { - var tokens = this.tokens; - assert (tokens[this.cursor].type === LEX.VAR); - assert (tokens[this.cursor + 1].type === LEX.IDENTIFIER); - assert (tokens[this.cursor + 2].type === LEX.ASSIGN); - var name = new IdentifierAstNode(tokens[this.cursor + 1]); - for (var j = this.cursor + 3; j < tokens.length; j++) { - if (tokens[j].type === LEX.SEMICOLON || tokens[j].type === LEX.EOF) { - this.cursor = this.cursor + 3 - var value = this.parseExpression(); - assert(tokens[this.cursor].type === LEX.SEMICOLON || tokens[this.cursor].type == LEX.EOF); - this.cursor ++; - return new VarSentence(name, value); - } - } + assert (tokens[this.cursor++].type === LEX.VAR); + assert (tokens[this.cursor].type === LEX.IDENTIFIER); + var name = new IdentifierAstNode(tokens[this.cursor++]); + assert (tokens[this.cursor++].type === LEX.ASSIGN); + var value = this.parseExpression(); + return new VarSentence(name, value); } + // 与var语句类似 parseReturnSentence() { var tokens = this.tokens; - assert (tokens[this.cursor].type === LEX.RETURN); - for (var j = this.cursor + 1; j < tokens.length; j++) { - if (tokens[j].type === LEX.SEMICOLON || tokens[j].type === LEX.EOF) { - this.cursor += 1; - var value = this.parseExpression(); - assert(tokens[this.cursor].type === LEX.SEMICOLON || tokens[this.cursor].type == LEX.EOF); - this.cursor ++; - return new ReturnSentence(value); - } - } + assert (tokens[this.cursor++].type === LEX.RETURN); + var value = this.parseExpression(); + assert(tokens[this.cursor].type === LEX.SEMICOLON || tokens[this.cursor].type == LEX.EOF); + return new ReturnSentence(value); } + // 转换为表达式语句 parseExpressionStatement() { var tokens = this.tokens; - for (var j = this.cursor; j < tokens.length; j++) { - if (tokens[j].type === LEX.SEMICOLON || tokens[j].type === LEX.EOF) { - var expression = this.parseExpression(); - assert(tokens[this.cursor].type === LEX.SEMICOLON || tokens[this.cursor].type == LEX>EOF); - this.cursor ++; - return new ExpressionStatement(expression); - } - } + var value = this.parseExpression(); + assert(tokens[this.cursor].type === LEX.SEMICOLON || tokens[this.cursor].type == LEX.EOF); + return new ExpressionStatement(value); } // 转换为块语句,块语句中包含一个语句数组 parseBlockSentence() { var tokens = this.tokens; - var braceCount = 0; - for (var j = this.cursor; j < tokens.length; j++) { - if (tokens[j].type == LEX.LBRACE) braceCount++; - if (tokens[j].type == LEX.RBRACE) braceCount--; - if (braceCount == 0) { - return new BlockSentence(parse(tokens.slice(this.cursor + 1, this.cursor = j))); - } - } - throw new Error("brace not close for block sentence") + assert(tokens[this.cursor++].type === LEX.LBRACE, "brace not open for block sentence") + var result = new BlockSentence(this.parse()); + assert(tokens[this.cursor++].type === LEX.RBRACE, "brace not close for block sentence"); + return result } parseExpression() { diff --git "a/24.12/\350\257\255\346\263\225\345\210\206\346\236\220.md" "b/24.12/\350\257\255\346\263\225\345\210\206\346\236\220.md" index 4de0df51..03ccd9bd 100644 --- "a/24.12/\350\257\255\346\263\225\345\210\206\346\236\220.md" +++ "b/24.12/\350\257\255\346\263\225\345\210\206\346\236\220.md" @@ -33,7 +33,7 @@ return func(1,1); var a = 1; } ``` -表达式在大多数解释型语言中都可以单独作为一个语句例如单独写一个`1 + 1;`,这也是一种语句类型,所以我们需要支持`var语句` `return语句` `块语句` `表达式语句`,暂时不考虑`if/while/for`等流程控制语句,后续会补充。(这里有人会困惑,函数声明不算是语句吗?函数声明是有返回值的表达式,返回当前函数。) +表达式在大多数解释型语言中都可以单独作为一个语句例如单独写一个`1 + 1;`,这也是一种语句类型,所以我们需要支持`var语句` `return语句` `块语句` `表达式语句`,暂时不考虑`if/while/for`等流程控制语句,其实块语句前期也用不到,也可以暂时忽略,后续会补充。(这里有人会困惑,函数声明不算是语句吗?函数声明是有返回值的表达式,返回当前函数。) ```js a + 1; 1 + 2; @@ -213,7 +213,7 @@ class VarSentence extends Sentence { } toString() { - return `var ${this.name} = ${this.value.toString()}`; + return `var ${this.name} = ${this.value.toString()};`; } } @@ -223,7 +223,7 @@ class ReturnSentence extends Sentence { this.value = value; // 这里的value也是表达式 } toString() { - return `return ${this.value.toString()}`; + return `return ${this.value.toString()};`; } } @@ -246,7 +246,7 @@ class ExpressionStatement extends Sentence { } toString() { - return this.expression.toString(); + return this.expression.toString() + ";"; } } @@ -383,7 +383,7 @@ for (var i = 0; i < sentences.length; i++) { - 运算符的优先级,例如`*/`法运算符优先级高于`+-`,所以要先执行乘除再执行加减。 - 运算符的结合性,例如`a + b - c`,这里`+`和`-`的结合性是左结合,所以先执行`a + b`,然后再执行`(a + b) - c`,大多数都是做结合,只有赋值符号是右结合`a = b = 1`。 -## 2.1 从四则运算开始 +## 2.1 从四则运算了解中缀表达式 上述表达式的形式有点复杂,我们先来只考虑最简单的正数的四则运算的场景,思考下面的表达式,我们如何解析成上图的树状结构?有人可能想起来`leetcode`的算法题和`逆波兰表达式`了,然后就有了一些痛苦的回忆。但是我们回归零点,自己来思考。 ```js 1 + 2 * 3 / 4 - 5 @@ -703,7 +703,7 @@ function getEofOrInfixNode(tokens, index) { } ``` 这样的单层循环,理解起来是不是比递归要简单多了,其实本质上是一样的,只是用栈模拟了递归的过程,这样对于`pratt`的解析,我们用了多种代码的形式,有了非常深刻的理解。 -## 2.2 其他语法 +## 2.2 其他表达式 上面花了大量篇幅来讲四则运算的语法分析,而实际上表达式除了四则运算符,还有其他的很多形式,正如上面提到的,我们还需要整理代码来适配: - 数字、字符串、布尔值、null、变量,这些单个值就是表达式,比如`1`、`"hello"`、`true`、`null`、`a`等。 - 前缀操作符 + 另一个表达式形成新的表达式,例如`-1`、`!true`等。 @@ -1252,109 +1252,201 @@ const precedenceMap = { '*': 18, '/': 18, '%': 18, } ``` -### 2.2.5 函数调用 - - -### 2.2.6 函数声明 +这里面有一个符号比较特殊就是`=`,他是右结合的,其他都是左结合,结合就是指相同优先级的符号出现2次,先计算左边还是右边,正常的其他符号例如`1 + 2 + 3`是左结合的`(1+2) + 3`,而`a = b = 1`是右侧结合`a = (b = 1)`。 +只需要改`parseExpression`中一行代码即可: +```js +// 如果是等号,那么是又结合,<= 换成 < +if (opNode.op.value == '=' ? opNode.precedence < stackTopPrecedence : opNode.precedence <= stackTopPrecedence) +``` +### 2.2.5 函数声明 +函数声明,`function name(a,b,c){xxx}`或者匿名函数赋值给变量`var name = function(a,b,c){xxx}`,因为效果一样,为了简化流程,我们只支持第二种形式,因为这种形式比较简单,函数声明就只是一个表达式,返回值就是函数本身。 -- todo if语句 for语句 while语句的支持。 +在`nextUnaryNode`函数中,识别`function`关键字,然后是`(0或多个参数)`,然后是`blockSentence`即可。 +```js +.... + case LEX.FUNCTION: + // function后跟左括号 + assert(tokens[++this.cursor].type == LEX.LPAREN, "function need a lparen"); + this.cursor++; + // 然后是空参数或者多个参数用逗号隔开 + var params = []; + while (tokens[this.cursor].type != LEX.RPAREN) { + assert(tokens[this.cursor].type == LEX.IDENTIFIER); + params.push(new IdentifierAstNode(tokens[this.cursor++])); + if (tokens[this.cursor].type == LEX.COMMA) { + this.cursor++; + } + if (tokens[this.cursor].type == LEX.RPAREN) { + this.cursor++; + break; + } + } + // 接下来是个块语句 {xxx} + var body = this.parseBlockSentence(); + node = new FunctionDeclarationAstNode(params, body) + break; +.... +``` +### 2.2.6 函数调用 +函数调用可以通过函数名`add(1,2)`,也可以通过函数声明的表达式直接调用`function(a,b){return a+b;}(1,2)`,还有可能是一个函数的返回值也是函数,即在前面形式之后调用`add(a,b)(a,b)`,而调用时候的每个入参都是一个表达式。对于函数名调用,则是在`nextUnaryNode`函数中识别到`IDENTIFIER`,然后识别到`LPAREN`,就认为是函数调用,接下来分别识别多个表达式即可,因为遇到逗号这个`parseExpression`中的符号无法处理,就自动结束了,直到遇到`RPAREN`才结束。而如果是函数声明也是类似的。 +```js +.... + case LEX.IDENTIFIER: + node = new IdentifierAstNode(tokens[this.cursor++]); + // 函数调用 + while (tokens[this.cursor].type == LEX.LPAREN) { + this.cursor++; + var args = []; + while (tokens[this.cursor].type != LEX.RPAREN) { + args.push(this.parseExpression()); + if (tokens[this.cursor].type == LEX.COMMA) { + this.cursor++; + } + } + this.cursor++; + node = new FunctionCallAstNode(node, args); + } + break; + case LEX.FUNCTION: + assert(tokens[++this.cursor].type == LEX.LPAREN, "function need a lparen"); + this.cursor++; + var params = []; + while (tokens[this.cursor].type != LEX.RPAREN) { + assert(tokens[this.cursor].type == LEX.IDENTIFIER); + params.push(new IdentifierAstNode(tokens[this.cursor++])); + if (tokens[this.cursor].type == LEX.COMMA) { + this.cursor++; + } + } + this.cursor++; + var body = this.parseBlockSentence(); + node = new FunctionDeclarationAstNode(params, body) + // 函数声明直接调用,与变量的代码一模一样 + while (tokens[this.cursor].type == LEX.LPAREN) { + this.cursor++; + var args = []; + while (tokens[this.cursor].type != LEX.RPAREN) { + args.push(this.parseExpression()); + if (tokens[this.cursor].type == LEX.COMMA) { + this.cursor++; + } + } + this.cursor++; + node = new FunctionCallAstNode(node, args); + } + break; +.... +``` +补齐后的全量代码在`24.11/parser_test11.mjs`。 +# 3 补齐控制流语句 +我们上面一共有四种语句`VarSentence` `ReturnSentence` `BlockSentence`和`ExpressionSentence`,还缺了控制流语句,包括`IfSentence` `ForSentence` `BreakSentence` `ContinueSentence`,其中`for`可以替换掉`while`,所以我们就不实现后者,并且`if else`的嵌套可以实现`if() else if()`,所以我们也不实现`else if`语法了,另外`switch`也可以被`if`平替,所以也不实现`switch`语法。 +## 3.1 IfSentence +`if`语句有两种形式:`if(condition){xxx}`,`if(condition){xxx}else{yyy}`,我们要做的就是`parse`函数中识别`if`关键字,然后`parseIfSentence`即可。后者是主要的逻辑,为识别`(`然后`parseExpression`得到`condition`,最后是一个块语句。 +```js +class IfSentence extends Sentence { + constructor(condition, ifBody, elseBody) { + super(); + this.condition = condition; + this.ifBody = ifBody; + this.elseBody = elseBody; + } + toString() {//省略。。 + } +} +parseIfSentence() { + var tokens = this.tokens; + assert(tokens[this.cursor++].type == LEX.IF, "if sentence need a if"); // if + assert(tokens[this.cursor++].type == LEX.LPAREN, "if sentence need a LPAREN follow if"); // ( + var condition = this.parseExpression(); // condition + assert(tokens[this.cursor++].type == LEX.RPAREN, "if sentence need a RPAREN follow condition");// ) + var ifBody = this.parseBlockSentence(); // {xxx} + if (tokens[this.cursor].type == LEX.ELSE) { + this.cursor++; // else + var elseBody = this.parseBlockSentence(); // {yyy} + } + return new IfSentence(condition, ifBody, elseBody); +} +``` +## 3.2 ForSentence +`for`语句的语法为:`for(init;condition;step){xxx}`,其中`init` `step` `condition`均可以省略,所以`parseForSentence`中识别`for`关键字,即运行`parseForSentence`函数。主要逻辑在该函数中,主要是识别`for`之后是`(`,然后调用`parse`函数识别期望返回一个长度为3的Sentence数组,分别对应`init` `condition` `step`,注意这里要在`parse`函数中加上,遇到`)`就停止识别,直接返回。 -下面是没整理好的代码与思路。 +但是`parse`其实会报错,因为并不能返回三个语句,第三个语句`step`后面不是分号而是`)`,所以要么修改`parseExpressionSentence`,遇到`)`也返回,要么也可以新增一个`parseSentence`为解析下一个语句,如下: -先来看简单的函数声明,函数声明的格式有两种,一种是`function name(a,b,c){}`,一种是`function(a,b,c){}`,后者是匿名函数。 ```js -class FunctionDeclarationAstNode extends AstNode { - constructor(name, params, body) { +class ForSentence extends Sentence { + constructor(init, condition, step, body) { super(); - this.name = name == null ? null :new IdentifierAstNode(name); - this.params = params; + this.init = init; + this.condition = condition; + this.step = step; this.body = body; } - toString() { - return `function${this.name ? ' ' + this.name.toString() : ''}(${this.params.join(',')})${this.body.map(it=>it.toString()).join('\n')}`; - } +} +class BreakSentence extends Sentence {} +class ContinueSentence extends Sentence {} + +parseForSentence() { + var tokens = this.tokens; + assert(tokens[this.cursor++].type == LEX.FOR, "for sentence need a for"); + assert(tokens[this.cursor++].type == LEX.LPAREN, "for sentence need a LPAREN follow for"); + var init = this.parseSentence(); + assert(tokens[this.cursor++].type == LEX.SEMICOLON, "for sentence error need a SEMICOLON after init"); + var condition = this.parseSentence(); + assert(tokens[this.cursor++].type == LEX.SEMICOLON, "for sentence error need a SEMICOLON after condition"); + var step = this.parseExpression(); + assert(tokens[this.cursor++].type == LEX.RPAREN, "for sentence need a RPAREN follow condition"); + var body = this.parseBlockSentence(); + return new ForSentence(init, condition, step, body); } -function parseExpression(tokens, start, end) { - //.... - function parseFunctionDeclaration() { - assert(tokens[i].type == LEX.FUNCTION); - // 1 函数名识别,null为匿名函数 - var name = tokens[i + 1].type == LEX.IDENTIFIER ? tokens[i++] : null; - assert(tokens[i + 1].type == LEX.LPATTEN); - // 2 参数识别,格式就是括号内,identifier,逗号,..循环..右括号结束 - var params = []; - for (var j = i + 2; j < end; j+=2) { - assert(tokens[j].type == LEX.IDENTIFIER); - assert(tokens[j+1].type == LEX.COMMA); - params.push(tokens[j]); - // 右括号结束参数部分 - if (tokens[j].type == LEX.RPATTEN) { - i = j + 1; - break; - } - } - // 3 body识别,按照大括号识别即可,注意有可能有大括号嵌套,所以要记录左大括号出现的数量,当右大括号出现,数量减一。数量为0,就是函数body结束 - assert(tokens[i].type == LEX.LBRACE); - var braceCount = 1; - for (var j = i + 1; j < end; j++) { - if (tokens[j].type == LEX.LBRACE) braceCount++; - if (tokens[j].type == LEX.RBRACE) braceCount--; - // 函数结束 - if (braceCount == 0) { - var body = parseSentences(tokens, i, j + 1); - i = j + 1; - return new FunctionDeclarationAstNode(name, params, body); - } +parse() { + var sentences = []; + for (;;) { + var item = this.parseSentence(); + if (item == null) break; + if (item instanceof EmptySentence) { + continue; } + sentences.push(item); } + return sentences; } - -``` -函数调用,函数调用主要就是将每个参数进行表达式的解析,参数是用逗号分开的`func1(a,b,c)`一般找到一个逗号或者`)`就可以切成一个参数了,但是考虑到有可能参数中嵌套函数,`func1(func2(a,b), c))`,此时用逗号直接切分是不行的,简单的方法考虑到逗号只出现在函数的参数和数组中,而这两种情况都是有`()`和`[]`包裹的,所以可以在没有括号包裹的情况下,出现逗号或者出现`)`,认为是参数的切分标志。代码如下: -```js -class FunctionCallAstNode extends AstNode { - constructor(name, args) { - super(); - this.name = new IdentifierAstNode(name); - this.args = args; // args是ast数组 - } - toString() { - return `${this.name.toString()}(${this.args.map(it=>it.toString()).join(',')})` +parseSentence() { + var token = tokens[this.cursor]; + if (token.type === LEX.SEMICOLON) { + this.cursor++; + return new EmptySentence(); + } else if (token.type === LEX.EOF || token.type === LEX.RBRACE || token.type === LEX.RPAREN) { + return null; + } if (token.type === LEX.VAR) { + return this.parseVarSentence(); + } else if (token.type === LEX.RETURN) { + return this.parseReturnSentence(); + } else if (token.type === LEX.LBRACE) { + return this.parseBlockSentence(); + } else if (token.type === LEX.IF) { + return this.parseIfSentence(); + } else if (token.type === LEX.FOR) { + return this.parseForSentence(); + } else if (token.type === LEX.BREAK) { + return new BreakSentence(); + } else if (token.type === LEX.CONTINUE) { + return new ContinueSentence(); + } else { + return this.parseExpressionStatement(); } } +``` +在有了强大的`parse`和`parseExpression`函数加持下,是不是`if`和`for`语句的解析都变简单了。 +# 4 整理一下结构 +## 4.1 让Sentence继承AstNode +一开始我们准备了`AstNode`是专门针对表达式的,所以最开始我们区分了`AstNode`针对表达式,而`Sentence`针对语句,为什么现在让`Sentence`也继承自`AstNode`呢? -function parseExpresstion(tokens, start, end, precedence=0) { - // ...... - function parseFunctionCall() { - assert(tokens[i].type == LEX.IDENTIFIER); // 函数名 - assert(tokens[i + 1].type == LEX.LPAREN); // 左括号 - var nameTk = tokens[i]; - i = i + 2; // 此时i位于第一个参数的start位置 - - // 识别参数要找逗号来隔开每个参数的表达式,分别去递归解析 - var args = []; - var innerPattern = 0, innerBracket = 0; - for (var j = i; j < end; j++) { - if (tokens[j].type == LEX.LPAREN) innerPattern++; - if (tokens[j].type == LEX.RPAREN) innerPattern--; - if (tokens[j].type == LEX.LBRACE) innerBracket++; - if (tokens[j].type == LEX.RBRACE) innerBracket--; - // 最后一个参数 - if (innerPattern == -1) { - args.push(parseExpression(tokens, i, j)); - i = j + 1; - return new FunctionCallAstNode(nameTk, args); - } - // 出现逗号,并且不在内部的()或者[]中,说明是参数的结束 - if (tokens[j].type == LEX.COMMA && innerPattern == 0) { - args.push(parseExpression(tokens, i, j)); - i = j + 1; - } - } - throw new Error("unexpected end of expression"); - } +这是因为后续在求值的过程中能有更好的、统一的抽象。 +```js +class Sentence externds AstNode { + //.... } ``` \ No newline at end of file