From 9bdded005f67ad23ce55bf42593583cdc44fe7ec Mon Sep 17 00:00:00 2001 From: park671 Date: Wed, 23 Oct 2024 19:22:01 +0800 Subject: [PATCH] feature: syntaxer partly support pointer --- compiler/ast.h | 45 ++++++++++++++++++++++++++++++++-- compiler/lexer.cpp | 57 +++++++++++++++++++++++++++++++------------ compiler/mir.cpp | 10 +++++--- compiler/mir.h | 4 ++- compiler/syntaxer.cpp | 25 ++++++++++++++++--- compiler/token.h | 4 ++- 6 files changed, 119 insertions(+), 26 deletions(-) diff --git a/compiler/ast.h b/compiler/ast.h index 016244e..0de8659 100644 --- a/compiler/ast.h +++ b/compiler/ast.h @@ -5,6 +5,8 @@ #ifndef PCC_CC_AST_H #define PCC_CC_AST_H +#include "stdint.h" + enum PrimitiveType { TYPE_UNKNOWN, @@ -72,17 +74,32 @@ enum ArithmeticFactorType { enum ExpressionType { EXPRESSION_ASSIGNMENT, - EXPRESSION_ARITHMETIC + EXPRESSION_ARITHMETIC, + EXPRESSION_POINTER, }; struct AstType { + bool isPointer; PrimitiveType primitiveType; }; +enum IdentityType { + ID_METHOD, + ID_VAR, + ID_ARRAY, +}; + struct AstIdentity { + IdentityType type; const char *name; }; +struct AstArray { + PrimitiveType primitiveType; + int size; + void *buffer; +}; + struct AstParamDefine { AstType *type; AstIdentity *identity; @@ -171,6 +188,28 @@ struct AstExpressionArithmetic { struct AstExpression; +enum ExpressionPointerType { + EXP_POINTER_CALC, + EXP_POINTER_ARRAY, +}; + +struct AstExpressionPointerCalc { + //& + AstIdentity *identity; +}; + +struct AstExpressionPointerArray { + AstArray *array; +}; + +struct AstExpressionPointer { + ExpressionPointerType type; + union { + AstExpressionPointerCalc *pointerCalc; + AstExpressionPointerArray *pointerArray; + }; +}; + struct AstExpressionAssignment { AstIdentity *identity; //= @@ -182,6 +221,7 @@ struct AstExpression { union { AstExpressionAssignment *assignmentExpression; AstExpressionArithmetic *arithmeticExpression; + AstExpressionPointer *pointerExpression; }; }; @@ -200,7 +240,7 @@ struct AstStatementDefine { }; struct AstStatementMethodCall { - PrimitiveType retType; + AstType *retType; AstIdentity *identity; //( AstObjectList *objectList; @@ -320,6 +360,7 @@ enum AstNodeType { NODE_STATEMENT_METHOD_CALL, NODE_OBJECT_LIST, NODE_EXPRESSION, + NODE_EXPRESSION_POINTER, NODE_EXPRESSION_ASSIGNMENT, NODE_EXPRESSION_ARITHMETIC, NODE_EXPRESSION_ARITHMETIC_MORE, diff --git a/compiler/lexer.cpp b/compiler/lexer.cpp index eb77ac9..0f74c85 100644 --- a/compiler/lexer.cpp +++ b/compiler/lexer.cpp @@ -52,8 +52,10 @@ const char *getTokenTypeName(TokenType tokenType) { return "bool"; case TOKEN_CHARS: return "chars"; - case TOKEN_POINTER: + case TOKEN_POINTER_TYPE: return "pointer"; + case TOKEN_POINTER_OPERATOR: + return "pointer_op"; } return "unknown"; } @@ -115,6 +117,10 @@ inline static bool isPointer(char a) { return a == '*'; } +inline static bool isGetAddress(char a) { + return a == '&'; +} + inline static char *makeCharsCopy(const char *origin) { int length = strlen(origin) + 1; char *copy = (char *) pccMalloc(VAR_TAG, sizeof(char) * length); @@ -206,7 +212,18 @@ static Token *lexer(Token *tail, const char *buffer) { continue; } else if (isPointer(buffer[i])) { if (tail->tokenType == TOKEN_TYPE) { - tail->tokenType = TOKEN_POINTER; + tail->tokenType = TOKEN_POINTER_TYPE; + continue; + } else { + tail = processCompletedString(tail, tmp); + Token *token = (Token *) (pccMalloc(LEXER_TAG, sizeof(Token))); + token->tokenType = TOKEN_POINTER_OPERATOR; + char *content = (char *) pccMalloc(VAR_TAG, sizeof(char) * 2); + content[0] = buffer[i]; + content[1] = '\0'; + token->content = content; + tail->next = token; + tail = token; continue; } } else if (isBoundary(buffer[i])) { @@ -244,20 +261,30 @@ static Token *lexer(Token *tail, const char *buffer) { tail->next = token; tail = token; } - } else if (isBoolOperator(buffer[i])) { + } else if (isBoolOperator(buffer[i]) + && i + 1 < length + && buffer[i + 1] == buffer[i]) { tail = processCompletedString(tail, tmp); - if (i + 1 < length && buffer[i + 1] == buffer[i]) { - Token *token = (Token *) (pccMalloc(LEXER_TAG, sizeof(Token))); - token->tokenType = TOKEN_BOOL; - char *content = (char *) pccMalloc(VAR_TAG, sizeof(char) * 3); - content[0] = buffer[i]; - content[1] = buffer[i + 1]; - content[2] = '\0'; - token->content = content; - tail->next = token; - tail = token; - i++; - } + Token *token = (Token *) (pccMalloc(LEXER_TAG, sizeof(Token))); + token->tokenType = TOKEN_BOOL; + char *content = (char *) pccMalloc(VAR_TAG, sizeof(char) * 3); + content[0] = buffer[i]; + content[1] = buffer[i + 1]; + content[2] = '\0'; + token->content = content; + tail->next = token; + tail = token; + i++; + } else if (isGetAddress(buffer[i])) { + tail = processCompletedString(tail, tmp); + Token *token = (Token *) (pccMalloc(LEXER_TAG, sizeof(Token))); + token->tokenType = TOKEN_POINTER_OPERATOR; + char *content = (char *) pccMalloc(VAR_TAG, sizeof(char) * 2); + content[0] = buffer[i]; + content[1] = '\0'; + token->content = content; + tail->next = token; + tail = token; } else { snprintf(tmp, 256, "%s%c", tmp, buffer[i]); } diff --git a/compiler/mir.cpp b/compiler/mir.cpp index 3f7dbd6..e9fd2fb 100644 --- a/compiler/mir.cpp +++ b/compiler/mir.cpp @@ -182,7 +182,11 @@ static const char *convertBoolOpString(MirBooleanOperator mirBooleanOperator) { } -MirOperandType convertAstType2MirType(PrimitiveType primitiveType) { +MirOperandType convertAstType2MirType(AstType *retType) { + if (retType->isPointer) { + return OPERAND_POINTER; + } + PrimitiveType primitiveType = retType->primitiveType; switch (primitiveType) { case TYPE_CHAR: { return OPERAND_INT8; @@ -1034,7 +1038,7 @@ void generateParam(AstParamList *astParamList, MirMethod *mirMethod) { } } addVarInfo(mirMethodParam->paramName, - convertAstType2MirType(curAstParamList->paramDefine->type->primitiveType)); + convertAstType2MirType(curAstParamList->paramDefine->type)); curAstParamList = curAstParamList->next; } @@ -1050,7 +1054,7 @@ void generateParam(AstParamList *astParamList, MirMethod *mirMethod) { void generateMethod(AstMethodDefine *astMethodDefine, MirMethod *mirMethod) { mirMethod->label = astMethodDefine->identity->name; // logd(MIR_TAG, "--- mir method:%s", mirMethod->label); - addMethodInfo(astMethodDefine->identity->name, convertAstType2MirType(astMethodDefine->type->primitiveType)); + addMethodInfo(astMethodDefine->identity->name, convertAstType2MirType(astMethodDefine->type)); if (astMethodDefine->paramList != nullptr) { //var in method params need stack generateParam(astMethodDefine->paramList, mirMethod); diff --git a/compiler/mir.h b/compiler/mir.h index 6a4a16e..e2e2144 100644 --- a/compiler/mir.h +++ b/compiler/mir.h @@ -93,7 +93,9 @@ enum MirOperandType { OPERAND_INT64, OPERAND_FLOAT32, - OPERAND_FLOAT64 + OPERAND_FLOAT64, + + OPERAND_POINTER }; struct MirOperand { diff --git a/compiler/syntaxer.cpp b/compiler/syntaxer.cpp index a5d9e41..497cc88 100644 --- a/compiler/syntaxer.cpp +++ b/compiler/syntaxer.cpp @@ -108,11 +108,12 @@ Token *travelAst(Token *token, void *currentNode, AstNodeType nodeType) { case NODE_METHOD_DEFINE: { AstMethodDefine *astMethodDefine = (AstMethodDefine *) currentNode; //method type - if (token->tokenType != TOKEN_TYPE) { + if (token->tokenType != TOKEN_TYPE && token->tokenType != TOKEN_POINTER_TYPE) { loge(SYNTAX_TAG, "[-]error: method define need type: %s", token->content); return nullptr; } astMethodDefine->type = (AstType *) pccMalloc(SYNTAX_TAG, sizeof(AstType)); + astMethodDefine->type->isPointer = (token->tokenType == TOKEN_POINTER_TYPE); astMethodDefine->type->primitiveType = convertTokenType2PrimitiveType(token->content); token = token->next; //method name @@ -122,6 +123,7 @@ Token *travelAst(Token *token, void *currentNode, AstNodeType nodeType) { } astMethodDefine->identity = (AstIdentity *) pccMalloc(SYNTAX_TAG, sizeof(AstIdentity)); astMethodDefine->identity->name = token->content; + astMethodDefine->identity->type = ID_METHOD; token = token->next; //method ( if (token->tokenType != TOKEN_BOUNDARY || strcmp(token->content, "(") != 0) { @@ -165,12 +167,13 @@ Token *travelAst(Token *token, void *currentNode, AstNodeType nodeType) { } case NODE_PARAM_DEFINE: { AstParamDefine *astParamDefine = (AstParamDefine *) currentNode; - if (token->tokenType != TOKEN_TYPE) { + if (token->tokenType != TOKEN_TYPE && token->tokenType != TOKEN_POINTER_TYPE) { loge(SYNTAX_TAG, "[-]error: param define need type: %s", token->content); return nullptr; } astParamDefine->type = (AstType *) pccMalloc(SYNTAX_TAG, sizeof(AstType)); astParamDefine->type->primitiveType = convertTokenType2PrimitiveType(token->content); + astParamDefine->type->isPointer = (token->tokenType == TOKEN_POINTER_TYPE); token = token->next; if (token->tokenType != TOKEN_IDENTIFIER) { loge(SYNTAX_TAG, "[-]error: param define need identifier: %s", token->content); @@ -178,6 +181,7 @@ Token *travelAst(Token *token, void *currentNode, AstNodeType nodeType) { } astParamDefine->identity = (AstIdentity *) pccMalloc(SYNTAX_TAG, sizeof(AstIdentity)); astParamDefine->identity->name = token->content; + astParamDefine->identity->type = ID_VAR; addVar(astParamDefine->identity); token = token->next; break; @@ -247,12 +251,13 @@ Token *travelAst(Token *token, void *currentNode, AstNodeType nodeType) { token = token->next; token = travelAst(token, astStatement->forStatement, NODE_STATEMENT_RETURN); } - } else if (token->tokenType == TOKEN_TYPE) { + } else if (token->tokenType == TOKEN_TYPE || token->tokenType == TOKEN_POINTER_TYPE) { astStatement->statementType = STATEMENT_DEFINE; astStatement->defineStatement = (AstStatementDefine *) pccMalloc(SYNTAX_TAG, sizeof(AstStatementDefine)); astStatement->defineStatement->type = (AstType *) pccMalloc(SYNTAX_TAG, sizeof(AstType)); astStatement->defineStatement->type->primitiveType = convertTokenType2PrimitiveType(token->content); + astStatement->defineStatement->type->isPointer = (token->tokenType == TOKEN_POINTER_TYPE); token = token->next; if (token->tokenType != TOKEN_IDENTIFIER) { loge(SYNTAX_TAG, "[-]error: var define need identifier: %s", token->content); @@ -338,6 +343,11 @@ Token *travelAst(Token *token, void *currentNode, AstNodeType nodeType) { loge(SYNTAX_TAG, "[-]error: undefined var: %s", token->content); } break; + } else if(token->tokenType == TOKEN_POINTER_OPERATOR || token->tokenType == TOKEN_CHARS) { + astExpression->expressionType = EXPRESSION_POINTER; + astExpression->pointerExpression = (AstExpressionPointer *) pccMalloc(SYNTAX_TAG, + sizeof(AstExpressionPointer)); + token = travelAst(token, astExpression->pointerExpression, NODE_EXPRESSION_POINTER); } else { astExpression->expressionType = EXPRESSION_ARITHMETIC; astExpression->arithmeticExpression = (AstExpressionArithmetic *) pccMalloc(SYNTAX_TAG, @@ -346,6 +356,10 @@ Token *travelAst(Token *token, void *currentNode, AstNodeType nodeType) { } break; } + case NODE_EXPRESSION_POINTER: { + + break; + } case NODE_EXPRESSION_ASSIGNMENT: { AstExpressionAssignment *astExpressionAssignment = (AstExpressionAssignment *) currentNode; astExpressionAssignment->identity = (AstIdentity *) pccMalloc(SYNTAX_TAG, sizeof(AstIdentity)); @@ -641,6 +655,9 @@ Token *travelAst(Token *token, void *currentNode, AstNodeType nodeType) { } else if (token->tokenType == TOKEN_BOUNDARY && strcmp(token->content, "(")) { loge(SYNTAX_TAG, "[-]error: not impl yet: %s", token->content); return nullptr; + } else if (token->tokenType == TOKEN_CHARS) { + loge(SYNTAX_TAG, "[-]error: not impl yet: %s", token->content); + return nullptr; } else { loge(SYNTAX_TAG, "[-]error: except valid identifier or num: %s", token->content); return nullptr; @@ -659,7 +676,7 @@ Token *travelAst(Token *token, void *currentNode, AstNodeType nodeType) { astStatementMethodCall->identity->name); exit(1); } - astStatementMethodCall->retType = methodDefine->type->primitiveType; + astStatementMethodCall->retType = methodDefine->type; //check next token if (token->tokenType != TOKEN_BOUNDARY || strcmp(token->content, "(") != 0) { loge(SYNTAX_TAG, "[-]error: method call need (: %s", token->content); diff --git a/compiler/token.h b/compiler/token.h index 4014cdb..243bb16 100644 --- a/compiler/token.h +++ b/compiler/token.h @@ -10,13 +10,15 @@ enum TokenType { TOKEN_BOUNDARY, TOKEN_OPERATOR, TOKEN_OPERATOR_2, + TOKEN_POINTER_OPERATOR,//* & TOKEN_BOOL, TOKEN_INTEGER, TOKEN_FLOAT, TOKEN_CHARS, + TOKEN_ARRAY,//todo: impl this TOKEN_KEYWORD, TOKEN_TYPE, - TOKEN_POINTER, + TOKEN_POINTER_TYPE,//field same to type, but means this type's pointer TOKEN_IDENTIFIER };