diff --git a/include/dragon/token.h b/include/dragon/token.h index 6f624a9..bd77ce9 100644 --- a/include/dragon/token.h +++ b/include/dragon/token.h @@ -1,130 +1,110 @@ -#ifndef TOKEN_H -#define TOKEN_H +#pragma once -#include +#include +#include -typedef enum { +enum class TokenType; + +class Token { +public: + TokenType type; + std::string value; + size_t line; + size_t column; + + Token(TokenType type, std::string value, size_t line, size_t column) { + this->type = type; + this->value = value; + this->line = line; + this->column = column; + } + + Token(TokenType type, std::string value) { + this->type = type; + this->value = value; + this->line = 0; + this->column = 0; + } + + Token(TokenType type) { + this->type = type; + this->value = ""; + this->line = 0; + this->column = 0; + } + + inline bool operator==(const Token& other) const { + return this->type == other.type && this->value == other.value; + } + + inline bool operator!=(const Token& other) const { + return this->type != other.type || this->value != other.value; + } + + inline std::string to_string() { + if (this->line == 0 && this->column == 0 && this->value == "") { + return std::format("Token({})", this->type); + } + + if (this->line == 0 && this->column == 0) { + return std::format("Token({}, {})", this->type, this->value); + } + + if (this->value == "") { + return std::format("Token({}, {}, {}, {})", this->type, this->line, this->column); + } + + return std::format("Token({}, {}, {}, {})", this->type, this->value, this->line, this->column); + } +}; + +enum class TokenType { // Keywords - TOKEN_LET_KW, // "let" - TOKEN_MUT_KW, // "mut" - TOKEN_INT_KW, // "int" - TOKEN_IF_KW, // "if" - TOKEN_ELSE_KW, // "else" - TOKEN_FOR_KW, // "for" - TOKEN_IN_KW, // "in" - TOKEN_FUNC_KW, // "func" - TOKEN_RETURN_KW, // "return" - TOKEN_WHILE_KW, // "while" - TOKEN_TRUE_KW, // "true" - TOKEN_FALSE_KW, // "false" - TOKEN_BOOL_KW, // "bool" - TOKEN_BREAK_KW, // "break" - TOKEN_CONTINUE_KW, // "continue" - TOKEN_STRUCT_KW, // "struct" - TOKEN_ENUM_KW, // "enum" - TOKEN_TYPE_KW, // "type" - TOKEN_MATCH_KW, // "match" - TOKEN_IMPORT_KW, // "import" - TOKEN_AS_KW, // "as" + Let, + Mut, + If, + Else, + While, + For, + In, + True, + False, // Literals - TOKEN_INTEGER, // 123 - TOKEN_FLOAT, // 123.45 - TOKEN_IDENTIFIER, // variable_name - TOKEN_STRING, // "string" - TOKEN_CHAR, // 'c' + IntegerLiteral, + StringLiteral, + Identifier, // Symbols - TOKEN_EQUALS, // = - TOKEN_PLUS, // + - TOKEN_MINUS, // - - TOKEN_ASTERISK, // * - TOKEN_SLASH, // / - TOKEN_MODULO, // % - TOKEN_AND, // && - TOKEN_OR, // || - TOKEN_NOT, // ! - TOKEN_EQUALITY, // == - TOKEN_NOT_EQ, // != - TOKEN_GRT, // > - TOKEN_LSS, // < - TOKEN_LTE, // <= - TOKEN_GTE, // >= - TOKEN_LSHIFT, // << - TOKEN_RSHIFT, // >> - TOKEN_AMPERSAND, // & - TOKEN_PIPE, // | - TOKEN_CARET, // ^ - TOKEN_TILDE, // ~ - TOKEN_BRACE_OPEN, // { - TOKEN_BRACE_CLOSE, // } - TOKEN_PAREN_OPEN, // ( - TOKEN_PAREN_CLOSE, // ) - TOKEN_COMMA, // , - TOKEN_SEMICOLON, // ; - TOKEN_COLON, // : - TOKEN_DOT, // . - TOKEN_RANGE, // .. - TOKEN_DOUBLE_RIGHT_ARROW, // => - TOKEN_RIGHT_ARROW, // -> - - // Misc - TOKEN_COMMENT, // Comment - TOKEN_EOF, // End of file - TOKEN_INVALID // Invalid token -} TokenType; - -static const char* keywords[] = { - // Variable Declarations - "let", - "mut", - - // Data Types - "int", - "float", - "bool", - "char", - - // Control Flow - "if", - "else", - "for", - "in", - "while", - "break", - "continue", - - // Boolean Literals - "true", - "false", - - // Functions - "func", - "return", - - // Modules and Types - "import", - "struct", - "enum", - "type", - "match", - "as" + Plus, + Minus, + Star, + Slash, + And, + Or, + Not, + Equals, + NotEquals, + LessThan, + GreaterThan, + LessThanOrEqualTo, + GreaterThanOrEqualTo, + Assign, + LeftParen, + RightParen, + LeftBrace, + RightBrace, + LeftBracket, + RightBracket, + Comma, + Dot, + Range, + Ampersand, + Pipe, + Caret, + Tilde, + + // Misc + Comment, + Unknown, }; - - -typedef struct { - TokenType type; - char* value; -} Token; - -typedef struct { - Token* tokens; - size_t count; - size_t capacity; -} TokenList; - -TokenList* create_token_list(); -void append_token(TokenList* list, Token token); -void free_tokens(TokenList* list); - -#endif \ No newline at end of file diff --git a/src/token.cpp b/src/token.cpp deleted file mode 100644 index b152832..0000000 --- a/src/token.cpp +++ /dev/null @@ -1,27 +0,0 @@ -#include "dragon/token.h" -#include -#include - -TokenList* create_token_list() { - TokenList* list = (TokenList*)malloc(sizeof(TokenList)); - list->tokens = (Token*)malloc(sizeof(Token) * 8); - list->count = 0; - list->capacity = 8; - return list; -} - -void append_token(TokenList* list, Token token) { - if (list->count >= list->capacity) { - list->capacity *= 2; - list->tokens = (Token*)realloc(list->tokens, sizeof(Token) * list->capacity); - } - list->tokens[list->count++] = token; -} - -void free_tokens(TokenList* list) { - for (size_t i = 0; i < list->count; i++) { - free(list->tokens[i].value); - } - free(list->tokens); - free(list); -} diff --git a/tests/lexer_test.cpp b/tests/lexer_test.cpp index f135ef1..4162e98 100644 --- a/tests/lexer_test.cpp +++ b/tests/lexer_test.cpp @@ -76,7 +76,7 @@ TEST(LexerTests, StringLiterals) { "\"Abcdefghijklmnopqrstuvwxyz @#][{};;@'><,.//?)(*&^%$£1234567890+_-=`¬\\|\"" }; - const std::array validTokens = { + const std::array validTokens = { Token(TokenType::StringLiteral, "Enter username: "), Token(TokenType::StringLiteral, "This is a string with a escape characters \" \n \t "), Token(TokenType::StringLiteral, "Abcdefghijklmnopqrstuvwxyz @#][{};;@'><,.//?)(*&^%$£1234567890+_-=`¬\\|") @@ -85,7 +85,7 @@ TEST(LexerTests, StringLiterals) { Lexer lexer; for (size_t i = 0; i < input.size(); i++) { - Token t = lexer.lex_string_literal(input[i]); + Token t = lexer.lex_string(input[i]); ASSERT_TRUE(validTokens[i] == t); } } @@ -113,42 +113,53 @@ TEST(LexerTests, Integer) { } } -// Test for operators -TEST(LexerTests, Operators) { - const std::array input = { - "+", "-", "*", "/", "=" - }; - const std::array validTokens = { +// Test for all symbols +TEST(LexerTests, Symbols) { + const std::array input = { + "+", + "-", + "*", + "/", + "=", + "==", + "!=", + "<", + ">", + "<=", + ">=", + "&&", + "||", + "!", + "&", + "|", + "^", + "~" + }; + const std::array validTokens = { Token(TokenType::Plus, "+"), Token(TokenType::Minus, "-"), - Token(TokenType::Asterisk, "*"), + Token(TokenType::Star, "*"), Token(TokenType::Slash, "/"), - Token(TokenType::Equal, "=") - }; - - Lexer lexer; - - for (size_t i = 0; i < input.size(); i++) { - Token t = lexer.lex_operator(input[i]); - ASSERT_TRUE(validTokens[i] == t); - } -} - -// Test for punctuation -TEST(LexerTests, Punctuation) { - const std::array input = { - "{", "}", ";" - }; - const std::array validTokens = { - Token(TokenType::LeftBrace, "{"), - Token(TokenType::RightBrace, "}"), - Token(TokenType::Semicolon, ";") + Token(TokenType::Assign, "="), + Token(TokenType::Equals, "=="), + Token(TokenType::NotEquals, "!="), + Token(TokenType::LessThan, "<"), + Token(TokenType::GreaterThan, ">"), + Token(TokenType::LessThanOrEqualTo, "<="), + Token(TokenType::GreaterThanOrEqualTo, ">="), + Token(TokenType::And, "&&"), + Token(TokenType::Or, "||"), + Token(TokenType::Not, "!"), + Token(TokenType::Ampersand, "&"), + Token(TokenType::Pipe, "|"), + Token(TokenType::Caret, "^"), + Token(TokenType::Tilde, "~") }; Lexer lexer; for (size_t i = 0; i < input.size(); i++) { - Token t = lexer.lex_punctuation(input[i]); + Token t = lexer.lex_symbol(input[i]); ASSERT_TRUE(validTokens[i] == t); } } @@ -196,7 +207,7 @@ TEST(LexerTests, Arithmetic) { "1 * 2", "1 / 2" }; - const std::array, 4> validTokens = { + const std::vector> validTokens = { { Token(TokenType::IntegerLiteral, "1"), Token(TokenType::Plus, "+"), @@ -236,7 +247,7 @@ TEST(LexerTests, Boolean) { "true == false" "true != false" }; - const std::array, 4> validTokens = { + const std::vector> validTokens = { { Token(TokenType::True, "true"), Token(TokenType::And, "&&"), @@ -248,12 +259,12 @@ TEST(LexerTests, Boolean) { Token(TokenType::False, "false") }, { - Token(TokenType::Bang, "!"), + Token(TokenType::Not, "!"), Token(TokenType::True, "true") }, { Token(TokenType::True, "true"), - Token(TokenType::EqualEqual, "=="), + Token(TokenType::Equals, "=="), Token(TokenType::False, "false") } }; @@ -274,25 +285,25 @@ TEST(LexerTests, Relational) { "1 <= 2", "1 >= 2" }; - const std::array, 4> validTokens = { + const std::vector> validTokens = { { Token(TokenType::IntegerLiteral, "1"), - Token(TokenType::Less, "<"), + Token(TokenType::LessThan, "<"), Token(TokenType::IntegerLiteral, "2") }, { Token(TokenType::IntegerLiteral, "1"), - Token(TokenType::Greater, ">"), + Token(TokenType::GreaterThan, ">"), Token(TokenType::IntegerLiteral, "2") }, { Token(TokenType::IntegerLiteral, "1"), - Token(TokenType::LessEqual, "<="), + Token(TokenType::LessThanOrEqualTo, "<="), Token(TokenType::IntegerLiteral, "2") }, { Token(TokenType::IntegerLiteral, "1"), - Token(TokenType::GreaterEqual, ">="), + Token(TokenType::LessThanOrEqualTo, ">="), Token(TokenType::IntegerLiteral, "2") } }; @@ -313,7 +324,7 @@ TEST(LexerTests, Bitwise) { "1 ^ 2", "~1" }; - const std::array, 4> validTokens = { + const std::vector> validTokens = { { Token(TokenType::IntegerLiteral, "1"), Token(TokenType::Ampersand, "&"), @@ -356,20 +367,20 @@ TEST(LexerTests, Mixed) { Token(TokenType::IntegerLiteral, "4"), Token(TokenType::Minus, "-"), Token(TokenType::IntegerLiteral, "5"), - Token(TokenType::EqualEqual, "=="), - Token(TokenType::Bang, "!"), + Token(TokenType::Equals, "=="), + Token(TokenType::Not, "!"), Token(TokenType::True, "true"), Token(TokenType::And, "&&"), Token(TokenType::IntegerLiteral, "7"), - Token(TokenType::Less, "<"), + Token(TokenType::LessThan, "<"), Token(TokenType::IntegerLiteral, "8"), Token(TokenType::Or, "||"), Token(TokenType::IntegerLiteral, "9"), - Token(TokenType::Greater, ">"), + Token(TokenType::GreaterThan, ">"), Token(TokenType::IntegerLiteral, "10"), Token(TokenType::And, "&&"), Token(TokenType::IntegerLiteral, "11"), - Token(TokenType::LessEqual, "<="), + Token(TokenType::LessThanOrEqualTo, "<="), Token(TokenType::IntegerLiteral, "12"), Token(TokenType::Pipe, "|"), Token(TokenType::IntegerLiteral, "13"), @@ -408,7 +419,7 @@ TEST (LexerTests, VariableDeclarationWithExpr) { Token(TokenType::Let, "let"), Token(TokenType::Identifier, "variable"), Token(TokenType::Identifier, "int"), - Token(TokenType::Equal, "="), + Token(TokenType::Assign, "="), Token(TokenType::IntegerLiteral, "1"), Token(TokenType::Plus, "+"), Token(TokenType::IntegerLiteral, "2") @@ -426,7 +437,7 @@ TEST(LexerTests, VariableDeclarationWithoutType) { const std::vector validTokens = { Token(TokenType::Let, "let"), Token(TokenType::Identifier, "variable"), - Token(TokenType::Equal, "="), + Token(TokenType::Assign, "="), Token(TokenType::IntegerLiteral, "1"), Token(TokenType::Plus, "+"), Token(TokenType::IntegerLiteral, "2") @@ -446,7 +457,7 @@ TEST(LexerTests, MutableVariableDeclarationWithExpr) { Token(TokenType::Mut, "mut"), Token(TokenType::Identifier, "variable"), Token(TokenType::Identifier, "int"), - Token(TokenType::Equal, "="), + Token(TokenType::Assign, "="), Token(TokenType::IntegerLiteral, "1"), Token(TokenType::Plus, "+"), Token(TokenType::IntegerLiteral, "2") @@ -481,7 +492,7 @@ TEST(LexerTests, MutableVariableDeclarationWithoutType) { Token(TokenType::Let, "let"), Token(TokenType::Mut, "mut"), Token(TokenType::Identifier, "variable"), - Token(TokenType::Equal, "="), + Token(TokenType::Assign, "="), Token(TokenType::IntegerLiteral, "1"), Token(TokenType::Plus, "+"), Token(TokenType::IntegerLiteral, "2") @@ -498,7 +509,7 @@ TEST(LexerTests, Assignment) { const std::string input = "variable = 1 + 2"; const std::vector validTokens = { Token(TokenType::Identifier, "variable"), - Token(TokenType::Equal, "="), + Token(TokenType::Assign, "="), Token(TokenType::IntegerLiteral, "1"), Token(TokenType::Plus, "+"), Token(TokenType::IntegerLiteral, "2") @@ -558,11 +569,11 @@ TEST(LexerTests, WhileLoop) { const std::vector validTokens = { Token(TokenType::While, "while"), Token(TokenType::Identifier, "x"), - Token(TokenType::Less, "<"), + Token(TokenType::LessThan, "<"), Token(TokenType::IntegerLiteral, "10"), Token(TokenType::LeftBrace, "{"), Token(TokenType::Identifier, "x"), - Token(TokenType::Equal, "="), + Token(TokenType::Assign, "="), Token(TokenType::Identifier, "x"), Token(TokenType::Plus, "+"), Token(TokenType::IntegerLiteral, "1"), @@ -587,7 +598,7 @@ TEST(LexerTests, ForLoop) { Token(TokenType::IntegerLiteral, "10"), Token(TokenType::LeftBrace, "{"), Token(TokenType::Identifier, "i"), - Token(TokenType::Equal, "="), + Token(TokenType::Assign, "="), Token(TokenType::Identifier, "i"), Token(TokenType::Plus, "+"), Token(TokenType::IntegerLiteral, "1"), @@ -616,7 +627,7 @@ TEST(LexerTests, ForLoopWithExpr) { Token(TokenType::RightParen, ")"), Token(TokenType::LeftBrace, "{"), Token(TokenType::Identifier, "i"), - Token(TokenType::Equal, "="), + Token(TokenType::Assign, "="), Token(TokenType::Identifier, "i"), Token(TokenType::Plus, "+"), Token(TokenType::IntegerLiteral, "1"), @@ -635,11 +646,11 @@ TEST(LexerTests, IfStatement) { const std::vector validTokens = { Token(TokenType::If, "if"), Token(TokenType::Identifier, "x"), - Token(TokenType::Less, "<"), + Token(TokenType::LessThan, "<"), Token(TokenType::IntegerLiteral, "10"), Token(TokenType::LeftBrace, "{"), Token(TokenType::Identifier, "x"), - Token(TokenType::Equal, "="), + Token(TokenType::Assign, "="), Token(TokenType::Identifier, "x"), Token(TokenType::Plus, "+"), Token(TokenType::IntegerLiteral, "1"), @@ -658,11 +669,11 @@ TEST(LexerTests, ElseIfStatement) { const std::vector validTokens = { Token(TokenType::If, "if"), Token(TokenType::Identifier, "x"), - Token(TokenType::Less, "<"), + Token(TokenType::LessThan, "<"), Token(TokenType::IntegerLiteral, "10"), Token(TokenType::LeftBrace, "{"), Token(TokenType::Identifier, "x"), - Token(TokenType::Equal, "="), + Token(TokenType::Assign, "="), Token(TokenType::Identifier, "x"), Token(TokenType::Plus, "+"), Token(TokenType::IntegerLiteral, "1"), @@ -670,11 +681,11 @@ TEST(LexerTests, ElseIfStatement) { Token(TokenType::Else, "else"), Token(TokenType::If, "if"), Token(TokenType::Identifier, "x"), - Token(TokenType::Greater, ">"), + Token(TokenType::GreaterThan, ">"), Token(TokenType::IntegerLiteral, "10"), Token(TokenType::LeftBrace, "{"), Token(TokenType::Identifier, "x"), - Token(TokenType::Equal, "="), + Token(TokenType::Assign, "="), Token(TokenType::Identifier, "x"), Token(TokenType::Minus, "-"), Token(TokenType::IntegerLiteral, "1"), @@ -693,11 +704,11 @@ TEST(LexerTests, ElseStatement) { const std::vector validTokens = { Token(TokenType::If, "if"), Token(TokenType::Identifier, "x"), - Token(TokenType::Less, "<"), + Token(TokenType::LessThan, "<"), Token(TokenType::IntegerLiteral, "10"), Token(TokenType::LeftBrace, "{"), Token(TokenType::Identifier, "x"), - Token(TokenType::Equal, "="), + Token(TokenType::Assign, "="), Token(TokenType::Identifier, "x"), Token(TokenType::Plus, "+"), Token(TokenType::IntegerLiteral, "1"), @@ -705,7 +716,7 @@ TEST(LexerTests, ElseStatement) { Token(TokenType::Else, "else"), Token(TokenType::LeftBrace, "{"), Token(TokenType::Identifier, "x"), - Token(TokenType::Equal, "="), + Token(TokenType::Assign, "="), Token(TokenType::Identifier, "x"), Token(TokenType::Minus, "-"), Token(TokenType::IntegerLiteral, "1"),