diff --git a/CMakeLists.txt b/CMakeLists.txt index 7ed8040..236b8a6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,11 +10,10 @@ include_directories(include) set(SOURCES src/lexer.cpp - src/token.cpp src/parser.cpp - src/vm.cpp src/codegen.cpp src/semantics.cpp + src/main.cpp ) add_executable(dragon src/main.cpp ${SOURCES}) diff --git a/README.md b/README.md index d929b92..5bb432f 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Dragon +# Dragonfly Compiler A high-level multi-paradigm programming language. ``` diff --git a/examples/Common Programming Concepts/Operators.drg b/examples/Common Programming Concepts/Operators.drg index 2301d61..916099c 100644 --- a/examples/Common Programming Concepts/Operators.drg +++ b/examples/Common Programming Concepts/Operators.drg @@ -8,6 +8,5 @@ & | ^ > >= <= <= >> << -~ ? : */ \ No newline at end of file diff --git a/include/dragon/lexer.h b/include/dragon/lexer.h index 6eca3de..159acd8 100644 --- a/include/dragon/lexer.h +++ b/include/dragon/lexer.h @@ -1,24 +1,38 @@ -#ifndef LEXER_H -#define LEXER_H +#pragma once +#include +#include +#include +#include #include "token.h" -#include -#include -#include -#include - -typedef struct { - char* source; - size_t position; -} Lexer; - -Lexer* create_lexer(const char* source); -Token lex_number(Lexer* lexer); -Token lex_identifier(Lexer* lexer); -Token lex_symbol(Lexer* lexer); -Token lex_string(Lexer* lexer); -Token is_keyword(Token token); -TokenList* tokenise(const char* source); -void free_lexer(Lexer* lexer); - -#endif // LEXER_H \ No newline at end of file + +class Lexer { +public: + Lexer() = default; + Lexer(std::string input); + + std::vector lex(std::string input); + std::vector lex(); + + void reset(); + + Token lex_identifier(); + Token lex_number(); + Token lex_string(); + Token lex_symbol(); + Token lex_single_line_comment(); + Token lex_multi_line_comment(); + + TokenType get_keyword(std::string input); + +private: + std::vector tokens; + std::string input; + size_t index = 0; + size_t line = 1; + size_t column = 1; + + std::optional peek() const; + std::optional peek_next() const; + std::optional advance(); +}; \ No newline at end of file diff --git a/include/dragon/token.h b/include/dragon/token.h index 6f624a9..36c4b92 100644 --- a/include/dragon/token.h +++ b/include/dragon/token.h @@ -1,130 +1,164 @@ -#ifndef TOKEN_H -#define TOKEN_H +#pragma once -#include +#include -typedef enum { +enum class TokenType { // Keywords - TOKEN_LET_KW, // "let" - TOKEN_MUT_KW, // "mut" - TOKEN_INT_KW, // "int" - TOKEN_IF_KW, // "if" - TOKEN_ELSE_KW, // "else" - TOKEN_FOR_KW, // "for" - TOKEN_IN_KW, // "in" - TOKEN_FUNC_KW, // "func" - TOKEN_RETURN_KW, // "return" - TOKEN_WHILE_KW, // "while" - TOKEN_TRUE_KW, // "true" - TOKEN_FALSE_KW, // "false" - TOKEN_BOOL_KW, // "bool" - TOKEN_BREAK_KW, // "break" - TOKEN_CONTINUE_KW, // "continue" - TOKEN_STRUCT_KW, // "struct" - TOKEN_ENUM_KW, // "enum" - TOKEN_TYPE_KW, // "type" - TOKEN_MATCH_KW, // "match" - TOKEN_IMPORT_KW, // "import" - TOKEN_AS_KW, // "as" + Let, + Mut, + If, + Else, + While, + For, + In, + True, + False, // Literals - TOKEN_INTEGER, // 123 - TOKEN_FLOAT, // 123.45 - TOKEN_IDENTIFIER, // variable_name - TOKEN_STRING, // "string" - TOKEN_CHAR, // 'c' + IntegerLiteral, + StringLiteral, + Identifier, // Symbols - TOKEN_EQUALS, // = - TOKEN_PLUS, // + - TOKEN_MINUS, // - - TOKEN_ASTERISK, // * - TOKEN_SLASH, // / - TOKEN_MODULO, // % - TOKEN_AND, // && - TOKEN_OR, // || - TOKEN_NOT, // ! - TOKEN_EQUALITY, // == - TOKEN_NOT_EQ, // != - TOKEN_GRT, // > - TOKEN_LSS, // < - TOKEN_LTE, // <= - TOKEN_GTE, // >= - TOKEN_LSHIFT, // << - TOKEN_RSHIFT, // >> - TOKEN_AMPERSAND, // & - TOKEN_PIPE, // | - TOKEN_CARET, // ^ - TOKEN_TILDE, // ~ - TOKEN_BRACE_OPEN, // { - TOKEN_BRACE_CLOSE, // } - TOKEN_PAREN_OPEN, // ( - TOKEN_PAREN_CLOSE, // ) - TOKEN_COMMA, // , - TOKEN_SEMICOLON, // ; - TOKEN_COLON, // : - TOKEN_DOT, // . - TOKEN_RANGE, // .. - TOKEN_DOUBLE_RIGHT_ARROW, // => - TOKEN_RIGHT_ARROW, // -> - - // Misc - TOKEN_COMMENT, // Comment - TOKEN_EOF, // End of file - TOKEN_INVALID // Invalid token -} TokenType; - -static const char* keywords[] = { - // Variable Declarations - "let", - "mut", - - // Data Types - "int", - "float", - "bool", - "char", - - // Control Flow - "if", - "else", - "for", - "in", - "while", - "break", - "continue", - - // Boolean Literals - "true", - "false", - - // Functions - "func", - "return", - - // Modules and Types - "import", - "struct", - "enum", - "type", - "match", - "as" + Plus, + Minus, + Star, + Slash, + And, + Or, + Not, + Equals, + NotEquals, + LessThan, + GreaterThan, + LessThanOrEqualTo, + GreaterThanOrEqualTo, + Assign, + LeftParen, + RightParen, + LeftBrace, + RightBrace, + LeftBracket, + RightBracket, + Comma, + Dot, + Range, + Ampersand, + Pipe, + Caret, + Tilde, + + // Misc + Comment, + Unknown, }; - -typedef struct { +inline std::string token_type_to_string(TokenType type) { + switch (type) { + case TokenType::Let: return "Let"; + case TokenType::Mut: return "Mut"; + case TokenType::If: return "If"; + case TokenType::Else: return "Else"; + case TokenType::While: return "While"; + case TokenType::For: return "For"; + case TokenType::In: return "In"; + case TokenType::True: return "True"; + case TokenType::False: return "False"; + case TokenType::IntegerLiteral: return "IntegerLiteral"; + case TokenType::StringLiteral: return "StringLiteral"; + case TokenType::Identifier: return "Identifier"; + case TokenType::Plus: return "Plus"; + case TokenType::Minus: return "Minus"; + case TokenType::Star: return "Star"; + case TokenType::Slash: return "Slash"; + case TokenType::And: return "And"; + case TokenType::Or: return "Or"; + case TokenType::Not: return "Not"; + case TokenType::Equals: return "Equals"; + case TokenType::NotEquals: return "NotEquals"; + case TokenType::LessThan: return "LessThan"; + case TokenType::GreaterThan: return "GreaterThan"; + case TokenType::LessThanOrEqualTo: return "LessThanOrEqualTo"; + case TokenType::GreaterThanOrEqualTo: return "GreaterThanOrEqualTo"; + case TokenType::Assign: return "Assign"; + case TokenType::LeftParen: return "LeftParen"; + case TokenType::RightParen: return "RightParen"; + case TokenType::LeftBrace: return "LeftBrace"; + case TokenType::RightBrace: return "RightBrace"; + case TokenType::LeftBracket: return "LeftBracket"; + case TokenType::RightBracket: return "RightBracket"; + case TokenType::Comma: return "Comma"; + case TokenType::Dot: return "Dot"; + case TokenType::Range: return "Range"; + case TokenType::Ampersand: return "Ampersand"; + case TokenType::Pipe: return "Pipe"; + case TokenType::Caret: return "Caret"; + case TokenType::Tilde: return "Tilde"; + case TokenType::Comment: return "Comment"; + case TokenType::Unknown: return "Unknown"; + default: return "Unknown"; + } +} + +class Token { +public: TokenType type; - char* value; -} Token; - -typedef struct { - Token* tokens; - size_t count; - size_t capacity; -} TokenList; - -TokenList* create_token_list(); -void append_token(TokenList* list, Token token); -void free_tokens(TokenList* list); + std::string value; + size_t line; + size_t column; + + Token(TokenType type, std::string value, size_t line, size_t column) { + this->type = type; + this->value = value; + this->line = line; + this->column = column; + } + + Token(TokenType type, std::string value) { + this->type = type; + this->value = value; + this->line = 0; + this->column = 0; + } + + Token(TokenType type) { + this->type = type; + this->value = ""; + this->line = 0; + this->column = 0; + } + + inline bool operator==(const Token& other) const { + return this->type == other.type && this->value == other.value; + } + + inline bool operator!=(const Token& other) const { + return this->type != other.type || this->value != other.value; + } + + inline std::string to_string() { + std::stringstream ss; + ss << "Token(" << token_type_to_string(this->type); + if (!this->value.empty()) { + ss << ", " << this->value; + } + if (this->line != 0 || this->column != 0) { + ss << ", " << this->line << ", " << this->column; + } + ss << ")"; + return ss.str(); + } +}; -#endif \ No newline at end of file +inline std::string token_vector_to_string(std::vector tokens) { + std::stringstream ss; + ss << "["; + for (size_t i = 0; i < tokens.size(); i++) { + ss << tokens[i].to_string(); + if (i < tokens.size() - 1) { + ss << ", "; + } + } + ss << "]"; + return ss.str(); +} \ No newline at end of file diff --git a/include/dragon/vm.h b/include/dragon/vm.h deleted file mode 100644 index e69de29..0000000 diff --git a/src/lexer.cpp b/src/lexer.cpp index d2a3fcf..860da36 100644 --- a/src/lexer.cpp +++ b/src/lexer.cpp @@ -1,177 +1,355 @@ #include "dragon/lexer.h" -Lexer* create_lexer(const char* source) { - Lexer* lexer = (Lexer *)malloc(sizeof(Lexer)); - lexer->source = strdup(source); - lexer->position = 0; - return lexer; +#include + +Lexer::Lexer(std::string input) { + this->input = input; } -void free_lexer(Lexer* lexer) { - free(lexer->source); - free(lexer); +std::vector Lexer::lex(std::string input) { + if (!this->input.empty()) this->reset(); + this->input = input; + return this->lex(); } -TokenList* tokenise(const char* source) { - TokenList* tokens = create_token_list(); +void Lexer::reset() { + this->tokens.clear(); + this->input = ""; + this->index = 0; + this->line = 1; + this->column = 1; +} - if (source == NULL || strlen(source) == 0) { - return tokens; - } +std::vector Lexer::lex() { + while (this->index < this->input.size()) { + auto opt_c = this->peek(); + if (!opt_c.has_value()) break; + char c = opt_c.value(); + std::cout << "lexing starting with: " << c << std::endl; - Lexer* lexer = create_lexer(source); + if (std::isspace(c)) { + this->advance(); + continue; + } - while (lexer->position < strlen(lexer->source)) { - char c = lexer->source[lexer->position]; + if (std::isalpha(c) || c == '_') { + this->tokens.push_back(this->lex_identifier()); + continue; + } - while (c == ' ' || c == '\n' || c == '\t') { - lexer->position++; - c = lexer->source[lexer->position]; + if (std::isdigit(c)) { + this->tokens.push_back(this->lex_number()); + continue; } - Token token = {TOKEN_INVALID, NULL}; - if (isdigit(c)) { - token = lex_number(lexer); - } else if (isalpha(c)) { - token = lex_identifier(lexer); - } else if (c == '"' || c == '\'') { - token = lex_string(lexer); - } else { - token = lex_symbol(lexer); + if (c == '"') { + std::cout << "lexing string" << std::endl; + this->tokens.push_back(this->lex_string()); + continue; } - append_token(tokens, token); - } + if (c == '/' && this->peek_next() == '/') { + this->tokens.push_back(this->lex_single_line_comment()); + continue; + } + + if (c == '/' && this->peek_next() == '*') { + this->tokens.push_back(this->lex_multi_line_comment()); + continue; + } - free_lexer(lexer); + this->tokens.push_back(this->lex_symbol()); + } - return tokens; + return this->tokens; } -Token lex_number(Lexer* lexer) { - size_t start = lexer->position; - while (isdigit(lexer->source[lexer->position])) { - lexer->position++; - } +Token Lexer::lex_identifier() { + std::string value = ""; + size_t line = this->line; + size_t column = this->column; - size_t length = lexer->position - start; - char* value = strndup(lexer->source + start, length); + while (true) { + auto opt_c = this->peek(); + if (!opt_c.has_value() || !(std::isalnum(opt_c.value()) || opt_c.value() == '_')) { + break; + } + value += this->advance().value(); + std::cout << "building value: " << value << std::endl; + } - Token token = {TOKEN_INTEGER, value}; - return token; + TokenType type = this->get_keyword(value); + std::cout << "type: " << token_type_to_string(type) << std::endl; + std::cout << "value: " << "\"" << value << "\"" << std::endl; + return Token(type, value, line, column); } -Token lex_identifier(Lexer* lexer) { - size_t start = lexer->position; - while (isalnum(lexer->source[lexer->position])) { - lexer->position++; +Token Lexer::lex_number() { + std::string value = ""; + size_t line = this->line; + size_t column = this->column; + + while (true) { + auto opt_c = this->peek(); + if (opt_c.has_value() && opt_c.value() == '_') { + this->advance(); + continue; + } + if (!opt_c.has_value() || !std::isdigit(opt_c.value())) { + break; + } + value += this->advance().value(); } - size_t length = lexer->position - start; - char* value = strndup(lexer->source + start, length); + return Token(TokenType::IntegerLiteral, value, line, column); +} + +Token Lexer::lex_string() { + std::string value = ""; + size_t line = this->line; + size_t column = this->column; - Token token = {TOKEN_IDENTIFIER, value}; + this->advance(); // Skip the opening quote - token = is_keyword(token); + while (true) { + auto opt_c = this->peek(); + if (!opt_c.has_value() || opt_c.value() == '"') { + break; + } - return token; + if (opt_c.value() == '\\') { + this->advance(); // Skip the backslash + auto escaped_char = this->advance(); + if (escaped_char.has_value()) { + switch (escaped_char.value()) { + case 'n': value += '\n'; break; + case 't': value += '\t'; break; + case 'r': value += '\r'; break; + case '\\': value += '\\'; break; + case '"': value += '"'; break; + default: value += '\\'; value += escaped_char.value(); break; + } + } + } else { + value += this->advance().value(); + } + } + + this->advance(); // Skip the closing quote + + return Token(TokenType::StringLiteral, value, line, column); } -Token lex_symbol(Lexer* lexer) { - char c = lexer->source[lexer->position]; - TokenType type = TOKEN_INVALID; - char* value = NULL; +Token Lexer::lex_symbol() { + std::string value = ""; + size_t line = this->line; + size_t column = this->column; + + auto opt_c = this->peek(); + if (!opt_c.has_value()) { + return Token(TokenType::Unknown, value, line, column); + } + char c = opt_c.value(); switch (c) { - case '=': - type = TOKEN_EQUALS; - value = strndup(lexer->source + lexer->position, 1); - break; case '+': - type = TOKEN_PLUS; - value = strndup(lexer->source + lexer->position, 1); - break; - case '{': - type = TOKEN_BRACE_OPEN; - value = strndup(lexer->source + lexer->position, 1); - break; - case '}': - type = TOKEN_BRACE_CLOSE; - value = strndup(lexer->source + lexer->position, 1); - break; + value += this->advance().value(); + return Token(TokenType::Plus, value, line, column); + case '-': + value += this->advance().value(); + return Token(TokenType::Minus, value, line, column); + case '*': + value += this->advance().value(); + return Token(TokenType::Star, value, line, column); + case '/': + value += this->advance().value(); + return Token(TokenType::Slash, value, line, column); + case '!': + value += this->advance().value(); + if (auto next = this->peek(); next.has_value() && next.value() == '=') { + value += this->advance().value(); + return Token(TokenType::NotEquals, value, line, column); + } + return Token(TokenType::Not, value, line, column); + case '=': + value += this->advance().value(); + if (auto next = this->peek(); next.has_value() && next.value() == '=') { + value += this->advance().value(); + return Token(TokenType::Equals, value, line, column); + } + return Token(TokenType::Assign, value, line, column); + case '<': + value += this->advance().value(); + if (auto next = this->peek(); next.has_value() && next.value() == '=') { + value += this->advance().value(); + return Token(TokenType::LessThanOrEqualTo, value, line, column); + } + return Token(TokenType::LessThan, value, line, column); + case '>': + value += this->advance().value(); + if (auto next = this->peek(); next.has_value() && next.value() == '=') { + value += this->advance().value(); + return Token(TokenType::GreaterThanOrEqualTo, value, line, column); + } + return Token(TokenType::GreaterThan, value, line, column); + case '&': + value += this->advance().value(); + if (auto next = this->peek(); next.has_value() && next.value() == '&') { + value += this->advance().value(); + return Token(TokenType::And, value, line, column); + } + return Token(TokenType::Ampersand, value, line, column); + case '|': + value += this->advance().value(); + if (auto next = this->peek(); next.has_value() && next.value() == '|') { + value += this->advance().value(); + return Token(TokenType::Or, value, line, column); + } + return Token(TokenType::Pipe, value, line, column); + case '^': + value += this->advance().value(); + return Token(TokenType::Caret, value, line, column); + case '~': + value += this->advance().value(); + return Token(TokenType::Tilde, value, line, column); case '(': - type = TOKEN_PAREN_OPEN; - value = strndup(lexer->source + lexer->position, 1); - break; + value += this->advance().value(); + return Token(TokenType::LeftParen, value, line, column); case ')': - type = TOKEN_PAREN_CLOSE; - value = strndup(lexer->source + lexer->position, 1); - break; + value += this->advance().value(); + return Token(TokenType::RightParen, value, line, column); + case '{': + value += this->advance().value(); + return Token(TokenType::LeftBrace, value, line, column); + case '}': + value += this->advance().value(); + return Token(TokenType::RightBrace, value, line, column); + case '[': + value += this->advance().value(); + return Token(TokenType::LeftBracket, value, line, column); + case ']': + value += this->advance().value(); + return Token(TokenType::RightBracket, value, line, column); case ',': - type = TOKEN_COMMA; - value = strndup(lexer->source + lexer->position, 1); - break; - case '>': - type = TOKEN_GRT; - value = strndup(lexer->source + lexer->position, 1); - break; - case '-': - if (lexer->source[lexer->position + 1] == '>') { - type = TOKEN_RIGHT_ARROW; - value = strndup(lexer->source + lexer->position, 2); - lexer->position++; - } else { - type = TOKEN_INVALID; - value = strndup(lexer->source + lexer->position, 1); - } - break; + value += this->advance().value(); + return Token(TokenType::Comma, value, line, column); case '.': - printf("Next char: %c\n", lexer->source[lexer->position + 1]); - if (lexer->source[lexer->position + 1] == '.') { - type = TOKEN_RANGE; - value = strndup(lexer->source + lexer->position, 2); - lexer->position++; - } else { - type = TOKEN_INVALID; - value = strndup(lexer->source + lexer->position, 1); + value += this->advance().value(); + if (auto next = this->peek(); next.has_value() && next.value() == '.') { + value += this->advance().value(); + return Token(TokenType::Range, value, line, column); } - break; + return Token(TokenType::Dot, value, line, column); default: - type = TOKEN_INVALID; - value = strndup(lexer->source + lexer->position, 1); + value += this->advance().value(); + return Token(TokenType::Unknown, value, line, column); + } +} + +Token Lexer::lex_single_line_comment() { + std::string value = ""; + size_t line = this->line; + size_t column = this->column; + + while (true) { + auto opt_c = this->peek(); + if (!opt_c.has_value() || opt_c.value() == '\n') { break; + } + value += this->advance().value(); } - lexer->position++; + return Token(TokenType::Comment, value, line, column); +} + +Token Lexer::lex_multi_line_comment() { + std::string value = ""; + size_t line = this->line; + size_t column = this->column; + + while (true) { + auto opt_c = this->peek(); + auto opt_next_c = this->peek_next(); + if (!opt_c.has_value() || !opt_next_c.has_value()) { + break; + } + if (opt_c.value() == '*' && opt_next_c.value() == '/') { + value += this->advance().value(); + value += this->advance().value(); + break; + } + value += this->advance().value(); + } - Token token = {type, value}; - return token; + return Token(TokenType::Comment, value, line, column); } -Token lex_string(Lexer* lexer) { - char quote = lexer->source[lexer->position]; - lexer->position++; +TokenType Lexer::get_keyword(std::string value) { + if (value == "let") { + return TokenType::Let; + } + + if (value == "mut") { + return TokenType::Mut; + } + + if (value == "if") { + return TokenType::If; + } + + if (value == "else") { + return TokenType::Else; + } + + if (value == "while") { + return TokenType::While; + } - size_t start = lexer->position; - while (lexer->source[lexer->position] != quote) { - lexer->position++; + if (value == "for") { + return TokenType::For; } - size_t length = lexer->position - start; - char* value = strndup(lexer->source + start, length); + if (value == "in") { + return TokenType::In; + } - lexer->position++; + if (value == "true") { + return TokenType::True; + } - Token token = {TOKEN_STRING, value}; - return token; + if (value == "false") { + return TokenType::False; + } + + return TokenType::Identifier; } -Token is_keyword(Token token) { - for (size_t i = 0; i < sizeof(keywords) / sizeof(keywords[0]); i++) { - if (strcmp(token.value, keywords[i]) == 0) { - token.type = (TokenType)i; - return token; - } +std::optional Lexer::peek() const { + if (this->index < this->input.size()) { + return this->input[this->index]; + } + return std::nullopt; +} + +std::optional Lexer::peek_next() const { + if (this->index + 1 < this->input.size()) { + return this->input[this->index + 1]; } - return token; + return std::nullopt; } + +std::optional Lexer::advance() { + if (this->index < this->input.size()) { + char c = this->input[this->index]; + this->index++; + this->column++; + + if (c == '\n') { + this->line++; + this->column = 1; + } + + return c; + } + return std::nullopt; +} \ No newline at end of file diff --git a/src/token.cpp b/src/token.cpp deleted file mode 100644 index b152832..0000000 --- a/src/token.cpp +++ /dev/null @@ -1,27 +0,0 @@ -#include "dragon/token.h" -#include -#include - -TokenList* create_token_list() { - TokenList* list = (TokenList*)malloc(sizeof(TokenList)); - list->tokens = (Token*)malloc(sizeof(Token) * 8); - list->count = 0; - list->capacity = 8; - return list; -} - -void append_token(TokenList* list, Token token) { - if (list->count >= list->capacity) { - list->capacity *= 2; - list->tokens = (Token*)realloc(list->tokens, sizeof(Token) * list->capacity); - } - list->tokens[list->count++] = token; -} - -void free_tokens(TokenList* list) { - for (size_t i = 0; i < list->count; i++) { - free(list->tokens[i].value); - } - free(list->tokens); - free(list); -} diff --git a/src/vm.cpp b/src/vm.cpp deleted file mode 100644 index cdce42c..0000000 --- a/src/vm.cpp +++ /dev/null @@ -1 +0,0 @@ -#include "dragon/vm.h" \ No newline at end of file diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 82983d9..4a1be1a 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -1,13 +1,12 @@ -add_executable(run_tests lexer_test.cpp parser_test.cpp vm_test.cpp) +add_executable(run_tests lexer_test.cpp parser_test.cpp semantics_test.cpp) target_link_libraries(run_tests gtest gtest_main pthread) target_include_directories(run_tests PRIVATE ../include) target_sources(run_tests PRIVATE ../src/lexer.cpp - ../src/token.cpp ../src/parser.cpp - ../src/vm.cpp + ../src/semantics.cpp ) include(GoogleTest) diff --git a/tests/lexer_test.cpp b/tests/lexer_test.cpp index b4e541e..39be1e6 100644 --- a/tests/lexer_test.cpp +++ b/tests/lexer_test.cpp @@ -1,521 +1,854 @@ #include - - +#include +#include +#include +#include #include "dragon/lexer.h" #include "dragon/token.h" +// +TEST(LexerTests, Identifier) { + const std::vector input = { + "x", + "x_y123", + "reallyLongVariableNameWithNoNumbersOrUnderscores", + "U_ND_ER_SCO_RES", + "____starting___with__underscore", + }; + std::vector validTokens = { + Token(TokenType::Identifier, "x"), + Token(TokenType::Identifier, "x_y123"), + Token(TokenType::Identifier, "reallyLongVariableNameWithNoNumbersOrUnderscores"), + Token(TokenType::Identifier, "U_ND_ER_SCO_RES"), + Token(TokenType::Identifier, "____starting___with__underscore"), + }; + Lexer lexer; + + for (size_t i = 0; i < input.size(); i++) { + std::vector tokens = lexer.lex(input[i]); + + ASSERT_EQ(tokens.size(), 1) + << "Failed on input: " << input[i] + << " ( recieved a size of " << tokens.size() << ", expected a size of 1)"; + + ASSERT_EQ(validTokens[i], tokens[0]) + << "Failed on input: " << input[i] + << " ( recieved: " << token_vector_to_string(tokens) << ", expected: " << validTokens[i].to_string() << ")"; + } +} -TEST(LexerTests, TokeniseVariableDeclarations) { - const char* input = R"( - let variable324534 int = 42 - let mut t_t_3434_t_golden bool = true - let bacon_wrapped_4_shrimp float = 3.14 - )"; - - TokenList* tokens = tokenise(input); - - ASSERT_NE(tokens, nullptr); - ASSERT_EQ(tokens->count, 15); +// +TEST(LexerTests, Keywords) { + const std::vector input = { + "let", + "mut", + "if", + "else", + "for", + "in", + "true", + "false", + "while", + }; + std::vector validTokens = { + Token(TokenType::Let, "let"), + Token(TokenType::Mut, "mut"), + Token(TokenType::If, "if"), + Token(TokenType::Else, "else"), + Token(TokenType::For, "for"), + Token(TokenType::In, "in"), + Token(TokenType::True, "true"), + Token(TokenType::False, "false"), + Token(TokenType::While, "while"), + }; + Lexer lexer; + + for (size_t i = 0; i < input.size(); i++) { + std::vector tokens = lexer.lex(input[i]); + + ASSERT_EQ(tokens.size(), 1) + << "Failed on input: " << input[i] + << " ( recieved a size of " << tokens.size() << ", expected a size of 1)"; + + ASSERT_EQ(validTokens[i], tokens[0]) + << "Failed on input: " << input[i] + << " ( recieved: " << token_vector_to_string(tokens) << ", expected: " << validTokens[i].to_string() << ")"; + } +} - int i = 0; +// +TEST(LexerTests, StringLiterals) { + const std::vector input = { + "\"Enter username: \"", + "\"This is a string with a escape characters \\\" \\n \\t \"", + "\"Abcdefghijklmnopqrstuvwxyz @#][{};;@'><,.//?)(*&^%$£1234567890+_-=`¬\\|\"" - // let x int = 42 - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_LET_KW); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // x - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INT_KW); // int - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_EQUALS); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INTEGER); // 42 + }; + std::vector validTokens = { + Token(TokenType::StringLiteral, "Enter username: "), + Token(TokenType::StringLiteral, "This is a string with a escape characters \" \n \t "), + Token(TokenType::StringLiteral, "Abcdefghijklmnopqrstuvwxyz @#][{};;@'><,.//?)(*&^%$£1234567890+_-=`¬\\|")}; - // let mut y bool = true - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_LET_KW); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_MUT_KW); // mut - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // y - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BOOL_KW); // bool - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_EQUALS); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_TRUE_KW); // true + Lexer lexer; - // let z float = 3.14 - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_LET_KW); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // z - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_FLOAT); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_EQUALS); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_FLOAT); // 3.14 + for (size_t i = 0; i < input.size(); i++) { + std::vector tokens = lexer.lex(input[i]); - free_tokens(tokens); + ASSERT_EQ(tokens.size(), 1) + << "Failed on input: " << input[i] + << " ( recieved a size of " << tokens.size() << ", expected a size of 1)"; + + ASSERT_EQ(validTokens[i], tokens[0]) + << "Failed on input: " << input[i] + << " ( recieved: " << token_vector_to_string(tokens) << ", expected: " << validTokens[i].to_string() << ")"; + } } -TEST(LexerTests, TokeniseVariableAssignment) { - const char* input = "name = \"Alice and the frog\""; - TokenList* tokens = tokenise(input); - - ASSERT_NE(tokens, nullptr); - ASSERT_EQ(tokens->count, 5); - - int i = 0; - - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // "x" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_EQUALS); // "=" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INTEGER); // "42" - - free_tokens(tokens); +// +TEST(LexerTests, Integer) { + const std::vector input = { + "1", + "123", + "0", + "1_000_000", // Underscores are allowed, but are ignored + }; + std::vector validTokens = { + Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::IntegerLiteral, "123"), + Token(TokenType::IntegerLiteral, "0"), + Token(TokenType::IntegerLiteral, "1000000")}; + + Lexer lexer; + + for (size_t i = 0; i < input.size(); i++) { + std::vector tokens = lexer.lex(input[i]); + + ASSERT_EQ(tokens.size(), 1) + << "Failed on input: " << input[i] + << " ( recieved a size of " << tokens.size() << ", expected a size of 1)"; + + ASSERT_EQ(validTokens[i], tokens[0]) + << "Failed on input: " << input[i] + << " ( recieved: " << token_vector_to_string(tokens) << ", expected: " << validTokens[i].to_string() << ")"; + } } -TEST(LexerTests, TokeniseRelationalOperators) { - const char* input = "20 > x > 10 < 1000 <= 34_540 >= 0"; - TokenList* tokens = tokenise(input); +// Test for all symbols +TEST(LexerTests, Symbols) { + const std::vector input = { + "+", + "-", + "*", + "/", + "=", + "==", + "!=", + "<", + ">", + "<=", + ">=", + "&&", + "||", + "!", + "&", + "|", + "^", + "~"}; + std::vector validTokens = { + Token(TokenType::Plus, "+"), + Token(TokenType::Minus, "-"), + Token(TokenType::Star, "*"), + Token(TokenType::Slash, "/"), + Token(TokenType::Assign, "="), + Token(TokenType::Equals, "=="), + Token(TokenType::NotEquals, "!="), + Token(TokenType::LessThan, "<"), + Token(TokenType::GreaterThan, ">"), + Token(TokenType::LessThanOrEqualTo, "<="), + Token(TokenType::GreaterThanOrEqualTo, ">="), + Token(TokenType::And, "&&"), + Token(TokenType::Or, "||"), + Token(TokenType::Not, "!"), + Token(TokenType::Ampersand, "&"), + Token(TokenType::Pipe, "|"), + Token(TokenType::Caret, "^"), + Token(TokenType::Tilde, "~")}; + + Lexer lexer; + + for (size_t i = 0; i < input.size(); i++) { + std::vector tokens = lexer.lex(input[i]); + + ASSERT_EQ(tokens.size(), 1) + << "Failed on input: " << input[i] + << " ( recieved a size of " << tokens.size() << ", expected a size of 1)"; + + ASSERT_EQ(validTokens[i], tokens[0]) + << "Failed on input: " << input[i] + << " ( recieved: " << token_vector_to_string(tokens) << ", expected: " << validTokens[i].to_string() << ")"; + } +} - ASSERT_NE(tokens, nullptr); - ASSERT_EQ(tokens->count, 6); +// +// Test for single-line comments +TEST(LexerTests, SingleLineComments) { + const std::vector input = { + "// This is a comment"}; + std::vector validTokens = { + Token(TokenType::Comment, "// This is a comment")}; - int i = 0; + Lexer lexer; - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INTEGER); // "20" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_GRT); // ">" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // "x" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_GRT); // ">" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INTEGER); // "10" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_LSS); // "<" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INTEGER); // "1000" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_GTE); // ">=" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INTEGER); // "34540" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_LTE); // "<=" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INTEGER); // "0" + for (size_t i = 0; i < input.size(); i++) { + std::vector tokens = lexer.lex(input[i]); - free_tokens(tokens); + ASSERT_EQ(tokens.size(), 1) + << "Failed on input: " << input[i] + << " ( recieved a size of " << tokens.size() << ", expected a size of 1)"; + + ASSERT_EQ(validTokens[i], tokens[0]) + << "Failed on input: " << input[i] + << " ( recieved: " << token_vector_to_string(tokens) << ", expected: " << validTokens[i].to_string() << ")"; + } } -TEST(LexerTests, TokeniseBooleanOperators) { - const char* input = "false || true && !!x"; - TokenList* tokens = tokenise(input); - - ASSERT_NE(tokens, nullptr); - ASSERT_EQ(tokens->count, 7); +// +// Test for multi-line comments +TEST(LexerTests, MultiLineComments) { + const std::vector input = { + "/* This is a multi-line comment */"}; + std::vector validTokens = { + Token(TokenType::Comment, "/* This is a multi-line comment */")}; - int i = 0; + Lexer lexer; - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_FALSE_KW); // "false" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_OR); // "||" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_TRUE_KW); // "true" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_AND); // "&&" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_NOT); // "!" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_NOT); // "!" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // "x" + for (size_t i = 0; i < input.size(); i++) { + std::vector tokens = lexer.lex(input[i]); - free_tokens(tokens); + ASSERT_EQ(tokens.size(), 1) + << "Failed on input: " << input[i] + << " ( recieved a size of " << tokens.size() << ", expected a size of 1)"; + + ASSERT_EQ(validTokens[i], tokens[0]) + << "Failed on input: " << input[i] + << " ( recieved: " << token_vector_to_string(tokens) << ", expected: " << validTokens[i].to_string() << ")"; + } } -TEST(LexerTests, TokeniseElseIf) { - const char* input = R"( - if x > 10 { - return - } else if x < 5 { - break +// +// Arithmetic +TEST(LexerTests, ArithmeticExpression) { + const std::vector input = { + "1 + 2", + "1 - 2", + "1 * 2", + "1 / 2"}; + const std::vector> validTokens = { + {Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::Plus, "+"), + Token(TokenType::IntegerLiteral, "2")}, + {Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::Minus, "-"), + Token(TokenType::IntegerLiteral, "2")}, + {Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::Star, "*"), + Token(TokenType::IntegerLiteral, "2")}, + {Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::Slash, "/"), + Token(TokenType::IntegerLiteral, "2")}}; + Lexer lexer; + + for (size_t i = 0; i < input.size(); i++) + { + std::vector tokens = lexer.lex(input[i]); + + ASSERT_EQ(validTokens[i].size(), tokens.size()) + << "Failed on input: " << (input[i]) + << " ( recieved a size of " << tokens.size() << ", expected a size of " << validTokens[i].size() << ")"; + + for (size_t j = 0; j < tokens.size(); j++) + { + ASSERT_EQ(validTokens[i][j], tokens[j]) + << "Failed on input: " << (input[i]) + << " ( recieved: " << token_vector_to_string(tokens) << ", expected: " << token_vector_to_string(validTokens[i]) << ")"; } - )"; - - TokenList* tokens = tokenise(input); - - ASSERT_NE(tokens, nullptr); - ASSERT_EQ(tokens->count, 16); - - int i = 0; - - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IF_KW); // "if" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // "x" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_GRT); // ">" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INTEGER); // "10" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BRACE_OPEN); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_RETURN_KW); // "return" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BRACE_CLOSE); - - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_ELSE_KW); // "else" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IF_KW); // "if" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // "x" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_LTE); // "<" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INTEGER); // "5" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BRACE_OPEN); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BREAK_KW); // "break" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BRACE_CLOSE); - - free_tokens(tokens); + } } -TEST(LexerTests, TokeniseConditionalForLoop) { - const char* input = R"( - if 20 > x > 10 { - return - } else { - for i in 1..10 { - if i == 5 && !!y { - break - } - continue - } +// +// Boolean +TEST(LexerTests, BooleanExpression) { + const std::vector input = { + "true && false", + "true || false", + "!true", + "true == false", + "true != false"}; + const std::vector> validTokens = { + {Token(TokenType::True, "true"), + Token(TokenType::And, "&&"), + Token(TokenType::False, "false")}, + {Token(TokenType::True, "true"), + Token(TokenType::Or, "||"), + Token(TokenType::False, "false")}, + {Token(TokenType::Not, "!"), + Token(TokenType::True, "true")}, + {Token(TokenType::True, "true"), + Token(TokenType::Equals, "=="), + Token(TokenType::False, "false")}, + {Token(TokenType::True, "true"), + Token(TokenType::NotEquals, "!="), + Token(TokenType::False, "false")}}; + Lexer lexer; + + for (size_t i = 0; i < input.size(); i++) + { + std::vector tokens = lexer.lex(input[i]); + + ASSERT_EQ(validTokens[i].size(), tokens.size()) + << "Failed on input: " << (input[i]) + << " ( recieved a size of " << tokens.size() << ", expected a size of " << validTokens[i].size() << ")"; + + for (size_t j = 0; j < tokens.size(); j++) + { + ASSERT_EQ(validTokens[i][j], tokens[j]) + << "Failed on input: " << input[i] + << " ( recieved: " << token_vector_to_string(tokens) << ", expected: " << token_vector_to_string(validTokens[i]) << ")"; } - )"; - - TokenList* tokens = tokenise(input); - - ASSERT_NE(tokens, nullptr); - ASSERT_EQ(tokens->count, 36); - - int i = 0; - - // if 20 > x > 10 - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IF_KW); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INTEGER); // 20 - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_GRT); // > - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // x - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_GRT); // > - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INTEGER); // 10 - - // { return } - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BRACE_OPEN); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_RETURN_KW); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BRACE_CLOSE); - - // else { for i in 1..10 { ... } } - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_ELSE_KW); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BRACE_OPEN); - - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_FOR_KW); // for - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // i - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IN_KW); // in - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INTEGER); // 1 - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_RANGE); // .. - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INTEGER); // 10 - - // { if i == 5 && !!y { break } continue } - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BRACE_OPEN); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IF_KW); // if - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // i - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_EQUALITY); // == - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INTEGER); // 5 - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_AND); // && - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_NOT); // ! - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_NOT); // ! - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // y - - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BRACE_OPEN); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BREAK_KW); // break - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BRACE_CLOSE); - - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_CONTINUE_KW); // continue - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BRACE_CLOSE); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BRACE_CLOSE); - - free_tokens(tokens); + } } -TEST(LexerTests, TokeniseWhileLoop) { - const char* input = R"( - while y { - y = false || false +// +// Relational +TEST(LexerTests, RelationalExpression) { + const std::vector input = { + "1 < 2", + "1 > 2", + "1 <= 2", + "1 >= 2"}; + const std::vector> validTokens = { + {Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::LessThan, "<"), + Token(TokenType::IntegerLiteral, "2")}, + {Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::GreaterThan, ">"), + Token(TokenType::IntegerLiteral, "2")}, + {Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::LessThanOrEqualTo, "<="), + Token(TokenType::IntegerLiteral, "2")}, + {Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::GreaterThanOrEqualTo, ">="), + Token(TokenType::IntegerLiteral, "2")}}; + Lexer lexer; + + for (size_t i = 0; i < input.size(); i++) + { + std::vector tokens = lexer.lex(input[i]); + + ASSERT_EQ(validTokens[i].size(), tokens.size()) + << "Failed on input: " << (input[i]) + << " ( recieved a size of " << tokens.size() << ", expected a size of " << validTokens[i].size() << ")"; + + for (size_t j = 0; j < tokens.size(); j++) + { + ASSERT_EQ(validTokens[i][j], tokens[j]) + << "Failed on input: " << input[i] + << " ( recieved: " << token_vector_to_string(tokens) << ", expected: " << token_vector_to_string(validTokens[i]) << ")"; } - )"; - - TokenList* tokens = tokenise(input); - - ASSERT_NE(tokens, nullptr); - ASSERT_EQ(tokens->count, 10); - - int i = 0; - - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_WHILE_KW); // while - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // y - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BRACE_OPEN); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // y - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_EQUALS); // = - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_FALSE_KW); // false - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_OR); // || - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_FALSE_KW); // false - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BRACE_CLOSE); - - free_tokens(tokens); + } } -TEST(LexerTests, TokeniseFunctionDefinition) { - const char* input = R"( - func add(a int, b int) -> int { - return a + b +// +// bitwise +TEST(LexerTests, BitwiseExpression) { + const std::vector input = { + "1 & 2", + "1 | 2", + "1 ^ 2", + "~1"}; + const std::vector> validTokens = { + {Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::Ampersand, "&"), + Token(TokenType::IntegerLiteral, "2")}, + {Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::Pipe, "|"), + Token(TokenType::IntegerLiteral, "2")}, + {Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::Caret, "^"), + Token(TokenType::IntegerLiteral, "2")}, + {Token(TokenType::Tilde, "~"), + Token(TokenType::IntegerLiteral, "1")}}; + Lexer lexer; + + for (size_t i = 0; i < input.size(); i++) + { + std::vector tokens = lexer.lex(input[i]); + + ASSERT_EQ(validTokens[i].size(), tokens.size()) + << "Failed on input: " << (input[i]) + << " ( recieved a size of " << tokens.size() << ", expected a size of " << validTokens[i].size() << ")"; + + for (size_t j = 0; j < tokens.size(); j++) + { + ASSERT_EQ(validTokens[i][j], tokens[j]) + << "Failed on input: " << input[i] + << " ( recieved: " << token_vector_to_string(tokens) << ", expected: " << token_vector_to_string(validTokens[i]) << ")"; } - )"; - - TokenList* tokens = tokenise(input); - - ASSERT_NE(tokens, nullptr); - ASSERT_EQ(tokens->count, 17); - - int i = 0; - - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_FUNC_KW); // func - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // add - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_PAREN_OPEN); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // a - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INT_KW); // int - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_COMMA); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // b - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INT_KW); // int - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_PAREN_CLOSE); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_RIGHT_ARROW); // -> - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INT_KW); // int - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BRACE_OPEN); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_RETURN_KW); // return - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // a - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_PLUS); // + - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // b - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BRACE_CLOSE); - - free_tokens(tokens); + } } -TEST(LexerTests, TokeniseLambdaFunction) { - const char* input = "let add = (x, y) => x + y"; - TokenList* tokens = tokenise(input); - - ASSERT_NE(tokens, nullptr); - ASSERT_EQ(tokens->count, 10); - - int i = 0; - - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_LET_KW); // "let" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // "add" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_EQUALS); // "=" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_PAREN_OPEN); // "(" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // "x" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_COMMA); // "," - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // "y" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_PAREN_CLOSE); // ")" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_DOUBLE_RIGHT_ARROW); // "=>" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // "x" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_PLUS); // "+" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // "y" - - free_tokens(tokens); +// +// Mixed +TEST(LexerTests, MixedExpression) { + const std::string input = "1 + 2 * 3 / 4 - 5 == !true && 7 < 8 || 9 > 10 && 11 <= 12 | 13 & 14 ^ 15"; + const std::vector validTokens = { + Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::Plus, "+"), + Token(TokenType::IntegerLiteral, "2"), + Token(TokenType::Star, "*"), + Token(TokenType::IntegerLiteral, "3"), + Token(TokenType::Slash, "/"), + Token(TokenType::IntegerLiteral, "4"), + Token(TokenType::Minus, "-"), + Token(TokenType::IntegerLiteral, "5"), + Token(TokenType::Equals, "=="), + Token(TokenType::Not, "!"), + Token(TokenType::True, "true"), + Token(TokenType::And, "&&"), + Token(TokenType::IntegerLiteral, "7"), + Token(TokenType::LessThan, "<"), + Token(TokenType::IntegerLiteral, "8"), + Token(TokenType::Or, "||"), + Token(TokenType::IntegerLiteral, "9"), + Token(TokenType::GreaterThan, ">"), + Token(TokenType::IntegerLiteral, "10"), + Token(TokenType::And, "&&"), + Token(TokenType::IntegerLiteral, "11"), + Token(TokenType::LessThanOrEqualTo, "<="), + Token(TokenType::IntegerLiteral, "12"), + Token(TokenType::Pipe, "|"), + Token(TokenType::IntegerLiteral, "13"), + Token(TokenType::Ampersand, "&"), + Token(TokenType::IntegerLiteral, "14"), + Token(TokenType::Caret, "^"), + Token(TokenType::IntegerLiteral, "15")}; + + Lexer lexer; + + std::vector tokens = lexer.lex(input); + + ASSERT_EQ(validTokens.size(), tokens.size()) + << "Failed on input: " << input + << " ( recieved a size of " << tokens.size() << ", expected a size of " << validTokens.size() << ")"; + + ASSERT_EQ(validTokens, tokens) + << "Failed on input: " << input + << " ( recieved: " << token_vector_to_string(tokens) << ", expected: " << token_vector_to_string(validTokens) << ")"; } -TEST(LexerTests, TokeniseEmptyInput) { - const char* input = ""; - TokenList* tokens = tokenise(input); +// let +TEST(LexerTests, VariableDeclarationWithoutExpr) { + const std::string input = "let variable int"; + const std::vector validTokens = { + Token(TokenType::Let, "let"), + Token(TokenType::Identifier, "variable"), + Token(TokenType::Identifier, "int")}; + Lexer lexer; - ASSERT_NE(tokens, nullptr); - ASSERT_EQ(tokens->count, 0); + std::vector tokens = lexer.lex(input); - free_tokens(tokens); + ASSERT_EQ(validTokens.size(), tokens.size()) + << "Failed on input: " << input + << " ( recieved a size of " << tokens.size() << ", expected a size of " << validTokens.size() << ")"; + + ASSERT_EQ(validTokens, tokens) + << "Failed on input: " << input + << " ( recieved: " << token_vector_to_string(tokens) << ", expected: " << token_vector_to_string(validTokens) << ")"; } -TEST(LexerTests, TokeniseInvalidInput) { - const char* input = "$/?#¬`"; - TokenList* tokens = tokenise(input); - +// let = +TEST(LexerTests, VariableDeclarationWithExpr) { + const std::string input = "let variable int = 1 + 2"; + const std::vector validTokens = { + Token(TokenType::Let, "let"), + Token(TokenType::Identifier, "variable"), + Token(TokenType::Identifier, "int"), + Token(TokenType::Assign, "="), + Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::Plus, "+"), + Token(TokenType::IntegerLiteral, "2")}; + Lexer lexer; + + std::vector tokens = lexer.lex(input); + + ASSERT_EQ(validTokens.size(), tokens.size()) + << "Failed on input: " << input + << " ( recieved a size of " << tokens.size() << ", expected a size of " << validTokens.size() << ")"; + + ASSERT_EQ(validTokens, tokens) + << "Failed on input: " << input + << " ( recieved: " << token_vector_to_string(tokens) << ", expected: " << token_vector_to_string(validTokens) << ")"; +} - ASSERT_NE(tokens, nullptr); - ASSERT_EQ(tokens->count, 1); +// let = +TEST(LexerTests, VariableDeclarationWithoutType) { + const std::string input = "let variable = 1 + 2"; + const std::vector validTokens = { + Token(TokenType::Let, "let"), + Token(TokenType::Identifier, "variable"), + Token(TokenType::Assign, "="), + Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::Plus, "+"), + Token(TokenType::IntegerLiteral, "2")}; + Lexer lexer; + + std::vector tokens = lexer.lex(input); + + ASSERT_EQ(validTokens.size(), tokens.size()) + << "Failed on input: " << input + << " ( recieved a size of " << tokens.size() << ", expected a size of " << validTokens.size() << ")"; + + ASSERT_EQ(validTokens, tokens) + << "Failed on input: " << input + << " ( recieved: " << token_vector_to_string(tokens) << ", expected: " << token_vector_to_string(validTokens) << ")"; +} - EXPECT_EQ(tokens->tokens[0].type, TOKEN_INVALID); +// let mut = +TEST(LexerTests, MutableVariableDeclarationWithExpr) { + const std::string input = "let mut variable int = 1 + 2"; + const std::vector validTokens = { + Token(TokenType::Let, "let"), + Token(TokenType::Mut, "mut"), + Token(TokenType::Identifier, "variable"), + Token(TokenType::Identifier, "int"), + Token(TokenType::Assign, "="), + Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::Plus, "+"), + Token(TokenType::IntegerLiteral, "2")}; + Lexer lexer; + + std::vector tokens = lexer.lex(input); + + ASSERT_EQ(validTokens.size(), tokens.size()) + << "Failed on input: " << input + << " ( recieved a size of " << tokens.size() << ", expected a size of " << validTokens.size() << ")"; + + ASSERT_EQ(validTokens, tokens) + << "Failed on input: " << input + << " ( recieved: " << token_vector_to_string(tokens) << ", expected: " << token_vector_to_string(validTokens) << ")"; +} - free_tokens(tokens); +// let mut +TEST(LexerTests, MutableVariableDeclarationWithoutExpr) { + const std::string input = "let mut variable int"; + const std::vector validTokens = { + Token(TokenType::Let, "let"), + Token(TokenType::Mut, "mut"), + Token(TokenType::Identifier, "variable"), + Token(TokenType::Identifier, "int")}; + Lexer lexer; + + std::vector tokens = lexer.lex(input); + + ASSERT_EQ(validTokens.size(), tokens.size()) + << "Failed on input: " << input + << " ( recieved a size of " << tokens.size() << ", expected a size of " << validTokens.size() << ")"; + + ASSERT_EQ(validTokens, tokens) + << "Failed on input: " << input + << " ( recieved: " << token_vector_to_string(tokens) << ", expected: " << token_vector_to_string(validTokens) << ")"; } -// This tests everything supported by the lexer, including keywords, operators, and literals. -// The input is a large snippet of Dragon code that uses all the features of the language. -// This is useful to quickly check if the lexer is working as expected, more specific cases are useful for debugging. -TEST(LexerTests, TokeniseEverything) { - const char* input = R"( - let x int = 42 - let mut y bool = true - let z float = 3.14 - - if 20 > x > 10 { - return - } else { - for i in 1..10 { - if i == 5 && !!y { - break - } - continue - } - } +// let mut = +TEST(LexerTests, MutableVariableDeclarationWithoutType) { + const std::string input = "let mut variable = 1 + 2"; + const std::vector validTokens = { + Token(TokenType::Let, "let"), + Token(TokenType::Mut, "mut"), + Token(TokenType::Identifier, "variable"), + Token(TokenType::Assign, "="), + Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::Plus, "+"), + Token(TokenType::IntegerLiteral, "2")}; + Lexer lexer; + + std::vector tokens = lexer.lex(input); + + ASSERT_EQ(validTokens.size(), tokens.size()) + << "Failed on input: " << input + << " ( recieved a size of " << tokens.size() << ", expected a size of " << validTokens.size() << ")"; + + ASSERT_EQ(validTokens, tokens) + << "Failed on input: " << input + << " ( recieved: " << token_vector_to_string(tokens) << ", expected: " << token_vector_to_string(validTokens) << ")"; +} - while y { - y = false || false - } +// = +TEST(LexerTests, Assignment) { + const std::string input = "variable = 1 + 2"; + const std::vector validTokens = { + Token(TokenType::Identifier, "variable"), + Token(TokenType::Assign, "="), + Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::Plus, "+"), + Token(TokenType::IntegerLiteral, "2")}; + Lexer lexer; + + std::vector tokens = lexer.lex(input); + + ASSERT_EQ(validTokens.size(), tokens.size()) + << "Failed on input: " << input + << " ( recieved a size of " << tokens.size() << ", expected a size of " << validTokens.size() << ")"; + + ASSERT_EQ(validTokens, tokens) + << "Failed on input: " << input + << " ( recieved: " << token_vector_to_string(tokens) << ", expected: " << token_vector_to_string(validTokens) << ")"; +} - func add(a int, b int) -> int { - return a + b - } +// +// Example: x, y, 1 + 3 +TEST(LexerTests, Arguments) { + const std::string input = "x, y, 1 + 3"; + const std::vector validTokens = { + Token(TokenType::Identifier, "x"), + Token(TokenType::Comma, ","), + Token(TokenType::Identifier, "y"), + Token(TokenType::Comma, ","), + Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::Plus, "+"), + Token(TokenType::IntegerLiteral, "3")}; + Lexer lexer; + + std::vector tokens = lexer.lex(input); + + ASSERT_EQ(validTokens.size(), tokens.size()) + << "Failed on input: " << input + << " ( recieved a size of " << tokens.size() << ", expected a size of " << validTokens.size() << ")"; + + ASSERT_EQ(validTokens, tokens) + << "Failed on input: " << input + << " ( recieved: " << token_vector_to_string(tokens) << ", expected: " << token_vector_to_string(validTokens) << ")"; +} - struct Point { - x float, - y float - } +// () +TEST(LexerTests, FunctionCall) { + const std::string input = "function(x, y, 1 + 3)"; + const std::vector validTokens = { + Token(TokenType::Identifier, "function"), + Token(TokenType::LeftParen, "("), + Token(TokenType::Identifier, "x"), + Token(TokenType::Comma, ","), + Token(TokenType::Identifier, "y"), + Token(TokenType::Comma, ","), + Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::Plus, "+"), + Token(TokenType::IntegerLiteral, "3"), + Token(TokenType::RightParen, ")")}; + Lexer lexer; + + std::vector tokens = lexer.lex(input); + + ASSERT_EQ(validTokens.size(), tokens.size()) + << "Failed on input: " << input + << " ( recieved a size of " << tokens.size() << ", expected a size of " << validTokens.size() << ")"; + + ASSERT_EQ(validTokens, tokens) + << "Failed on input: " << input + << " ( recieved: " << token_vector_to_string(tokens) << ", expected: " << token_vector_to_string(validTokens) << ")"; +} - let mut p Point = Point{1, 2} - p.x = p.y * 10 / 100 - 5 +// while { } +TEST(LexerTests, WhileLoop) { + const std::string input = "while x < 10 { x = x + 1 }"; + const std::vector validTokens = { + Token(TokenType::While, "while"), + Token(TokenType::Identifier, "x"), + Token(TokenType::LessThan, "<"), + Token(TokenType::IntegerLiteral, "10"), + Token(TokenType::LeftBrace, "{"), + Token(TokenType::Identifier, "x"), + Token(TokenType::Assign, "="), + Token(TokenType::Identifier, "x"), + Token(TokenType::Plus, "+"), + Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::RightBrace, "}")}; + Lexer lexer; + + std::vector tokens = lexer.lex(input); + + ASSERT_EQ(validTokens.size(), tokens.size()) + << "Failed on input: " << input + << " ( recieved a size of " << tokens.size() << ", expected a size of " << validTokens.size() << ")"; + + ASSERT_EQ(validTokens, tokens) + << "Failed on input: " << input + << " ( recieved: " << token_vector_to_string(tokens) << ", expected: " << token_vector_to_string(validTokens) << ")"; +} - enum Color { - Red, - Green, - Blue, - } +// for in { } +TEST(LexerTests, ForLoop) { + const std::string input = "for i in 0..10 { i = i + 1 }"; + const std::vector validTokens = { + Token(TokenType::For, "for"), + Token(TokenType::Identifier, "i"), + Token(TokenType::In, "in"), + Token(TokenType::IntegerLiteral, "0"), + Token(TokenType::Range, ".."), + Token(TokenType::IntegerLiteral, "10"), + Token(TokenType::LeftBrace, "{"), + Token(TokenType::Identifier, "i"), + Token(TokenType::Assign, "="), + Token(TokenType::Identifier, "i"), + Token(TokenType::Plus, "+"), + Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::RightBrace, "}")}; + Lexer lexer; + + std::vector tokens = lexer.lex(input); + + ASSERT_EQ(validTokens.size(), tokens.size()) + << "Failed on input: " << input + << " ( recieved a size of " << tokens.size() << ", expected a size of " << validTokens.size() << ")"; + + ASSERT_EQ(validTokens, tokens) + << "Failed on input: " << input + << " ( recieved: " << token_vector_to_string(tokens) << ", expected: " << token_vector_to_string(validTokens) << ")"; +} - Color.Green - - import math - return; - )"; - - TokenList* tokens = tokenise(input); - - ASSERT_NE(tokens, nullptr); - - ASSERT_EQ(tokens->count, 131); - - int i = 0; - - // let x int = 42 - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_LET_KW); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INT_KW); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_EQUALS); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INTEGER); - - // let mut y bool = true - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_LET_KW); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_MUT_KW); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BOOL_KW); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_EQUALS); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_TRUE_KW); - - // let z float = 3.14 - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_LET_KW); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_FLOAT); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_EQUALS); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_FLOAT); - - // if 20 > x > 10 - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IF_KW); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INTEGER); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_GRT); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_GRT); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INTEGER); - - // { return } - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BRACE_OPEN); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_RETURN_KW); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BRACE_CLOSE); - - // else { ... } - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_ELSE_KW); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BRACE_OPEN); - - // for i in 1..10 - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_FOR_KW); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IN_KW); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INTEGER); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_RANGE); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INTEGER); - - // Continue for other statements similarly... - - // while y { y = false || false } - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_WHILE_KW); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // y - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BRACE_OPEN); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // y - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_EQUALS); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_FALSE_KW); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_OR); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_FALSE_KW); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BRACE_CLOSE); - - // func add(a int, b int) -> int { return a + b } - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_FUNC_KW); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // add - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_PAREN_OPEN); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // a - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INT_KW); // int - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_COMMA); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // b - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INT_KW); // int - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_PAREN_CLOSE); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_RIGHT_ARROW); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INT_KW); // int - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BRACE_OPEN); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_RETURN_KW); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // a - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_PLUS); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // b - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BRACE_CLOSE); - - // struct Point { x float, y float } - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_STRUCT_KW); // struct - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // Point - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BRACE_OPEN); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // x - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_FLOAT); // float - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_COMMA); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // y - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_FLOAT); // float - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BRACE_CLOSE); - - // let mut p Point = Point{1, 2} - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_LET_KW); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_MUT_KW); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // p - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // Point - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_EQUALS); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // Point - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BRACE_OPEN); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INTEGER); // 1 - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_COMMA); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INTEGER); // 2 - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BRACE_CLOSE); - - // p.x = p.y * 10 / 100 - 5 - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // p - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_DOT); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // x - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_EQUALS); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // p - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_DOT); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // y - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_ASTERISK); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INTEGER); // 10 - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_SLASH); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INTEGER); // 100 - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_MINUS); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INTEGER); // 5 - - // enum Color { Red, Green, Blue } - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_ENUM_KW); // enum - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // Color - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BRACE_OPEN); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // Red - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_COMMA); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // Green - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_COMMA); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // Blue - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BRACE_CLOSE); - - // Color.Green - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // Color - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_DOT); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // Green - - // import math - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IMPORT_KW); // import - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // math - - // return; - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_RETURN_KW); // return - - free_tokens(tokens); +// for in { } +TEST(LexerTests, ForLoopWithExpr) { + const std::string input = "for i in 0..(10 + 1) { i = i + 1 }"; + const std::vector validTokens = { + Token(TokenType::For, "for"), + Token(TokenType::Identifier, "i"), + Token(TokenType::In, "in"), + Token(TokenType::IntegerLiteral, "0"), + Token(TokenType::Range, ".."), + Token(TokenType::LeftParen, "("), + Token(TokenType::IntegerLiteral, "10"), + Token(TokenType::Plus, "+"), + Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::RightParen, ")"), + Token(TokenType::LeftBrace, "{"), + Token(TokenType::Identifier, "i"), + Token(TokenType::Assign, "="), + Token(TokenType::Identifier, "i"), + Token(TokenType::Plus, "+"), + Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::RightBrace, "}")}; + Lexer lexer; + + std::vector tokens = lexer.lex(input); + + ASSERT_EQ(validTokens.size(), tokens.size()) + << "Failed on input: " << input + << " ( recieved a size of " << tokens.size() << ", expected a size of " << validTokens.size() << ")"; + + ASSERT_EQ(validTokens, tokens) + << "Failed on input: " << input + << " ( recieved: " << token_vector_to_string(tokens) << ", expected: " << token_vector_to_string(validTokens) << ")"; } +// if { } +TEST(LexerTests, IfStatement) { + const std::string input = "if x < 10 { x = x + 1 }"; + const std::vector validTokens = { + Token(TokenType::If, "if"), + Token(TokenType::Identifier, "x"), + Token(TokenType::LessThan, "<"), + Token(TokenType::IntegerLiteral, "10"), + Token(TokenType::LeftBrace, "{"), + Token(TokenType::Identifier, "x"), + Token(TokenType::Assign, "="), + Token(TokenType::Identifier, "x"), + Token(TokenType::Plus, "+"), + Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::RightBrace, "}")}; + Lexer lexer; + + std::vector tokens = lexer.lex(input); + + ASSERT_EQ(validTokens.size(), tokens.size()) + << "Failed on input: " << input + << " ( recieved a size of " << tokens.size() << ", expected a size of " << validTokens.size() << ")"; + + ASSERT_EQ(validTokens, tokens) + << "Failed on input: " << input + << " ( recieved: " << token_vector_to_string(tokens) << ", expected: " << token_vector_to_string(validTokens) << ")"; +} +// else if { } +TEST(LexerTests, ElseIfStatement) { + const std::string input = "if x < 10 { x = x + 1 } else if x > 10 { x = x - 1 }"; + const std::vector validTokens = { + Token(TokenType::If, "if"), + Token(TokenType::Identifier, "x"), + Token(TokenType::LessThan, "<"), + Token(TokenType::IntegerLiteral, "10"), + Token(TokenType::LeftBrace, "{"), + Token(TokenType::Identifier, "x"), + Token(TokenType::Assign, "="), + Token(TokenType::Identifier, "x"), + Token(TokenType::Plus, "+"), + Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::RightBrace, "}"), + Token(TokenType::Else, "else"), + Token(TokenType::If, "if"), + Token(TokenType::Identifier, "x"), + Token(TokenType::GreaterThan, ">"), + Token(TokenType::IntegerLiteral, "10"), + Token(TokenType::LeftBrace, "{"), + Token(TokenType::Identifier, "x"), + Token(TokenType::Assign, "="), + Token(TokenType::Identifier, "x"), + Token(TokenType::Minus, "-"), + Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::RightBrace, "}")}; + Lexer lexer; + + std::vector tokens = lexer.lex(input); + + ASSERT_EQ(validTokens.size(), tokens.size()) + << "Failed on input: " << input + << " ( recieved a size of " << tokens.size() << ", expected a size of " << validTokens.size() << ")"; + + ASSERT_EQ(validTokens, tokens) + << "Failed on input: " << input + << " ( recieved: " << token_vector_to_string(tokens) << ", expected: " << token_vector_to_string(validTokens) << ")"; +} +// else { } +TEST(LexerTests, ElseStatement) { + const std::string input = "if x < 10 { x = x + 1 } else { x = x - 1 }"; + const std::vector validTokens = { + Token(TokenType::If, "if"), + Token(TokenType::Identifier, "x"), + Token(TokenType::LessThan, "<"), + Token(TokenType::IntegerLiteral, "10"), + Token(TokenType::LeftBrace, "{"), + Token(TokenType::Identifier, "x"), + Token(TokenType::Assign, "="), + Token(TokenType::Identifier, "x"), + Token(TokenType::Plus, "+"), + Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::RightBrace, "}"), + Token(TokenType::Else, "else"), + Token(TokenType::LeftBrace, "{"), + Token(TokenType::Identifier, "x"), + Token(TokenType::Assign, "="), + Token(TokenType::Identifier, "x"), + Token(TokenType::Minus, "-"), + Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::RightBrace, "}")}; + Lexer lexer; + + std::vector tokens = lexer.lex(input); + + ASSERT_EQ(validTokens.size(), tokens.size()) + << "Failed on input: " << input + << " ( recieved a size of " << tokens.size() << ", expected a size of " << validTokens.size() << ")"; + + ASSERT_EQ(validTokens, tokens) + << "Failed on input: " << input + << " ( recieved: " << token_vector_to_string(tokens) << ", expected: " << token_vector_to_string(validTokens) << ")"; +} \ No newline at end of file diff --git a/tests/parser_test.cpp b/tests/parser_test.cpp index e69de29..74a2bdc 100644 --- a/tests/parser_test.cpp +++ b/tests/parser_test.cpp @@ -0,0 +1,12 @@ +#include + +#include "dragon/lexer.h" +#include "dragon/token.h" +#include "dragon/parser.h" + +// Variable/Constant Declaration Node +// Variable/Constant Assignment Node +// While Loop Node +// For Loop Node +// If-Else Node +// Function Call Node \ No newline at end of file diff --git a/tests/semantics_test.cpp b/tests/semantics_test.cpp new file mode 100644 index 0000000..e958733 --- /dev/null +++ b/tests/semantics_test.cpp @@ -0,0 +1,13 @@ +#include + +#include "dragon/lexer.h" +#include "dragon/token.h" +#include "dragon/parser.h" +#include "dragon/semantics.h" + +// Duplicate variable/constant identifier +// Duplicate function call identifier +// Non-boolean expression for while loop +// Non-boolean expression for if statement +// For loop expression isn't iterable +// Function call argument type doesn't match parameter type \ No newline at end of file diff --git a/tests/vm_test.cpp b/tests/vm_test.cpp deleted file mode 100644 index e69de29..0000000