From 6619e3b4164d1560aa734dfe35e5e9862b9da1ac Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Mon, 30 Dec 2024 11:08:14 +0000 Subject: [PATCH 01/71] Deleted vm.h/.cpp --- include/dragon/vm.h | 0 src/vm.cpp | 1 - 2 files changed, 1 deletion(-) delete mode 100644 include/dragon/vm.h delete mode 100644 src/vm.cpp diff --git a/include/dragon/vm.h b/include/dragon/vm.h deleted file mode 100644 index e69de29..0000000 diff --git a/src/vm.cpp b/src/vm.cpp deleted file mode 100644 index cdce42c..0000000 --- a/src/vm.cpp +++ /dev/null @@ -1 +0,0 @@ -#include "dragon/vm.h" \ No newline at end of file From 821a3dfc3dc2721d4d9caf60d0851b3e22e8940d Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Mon, 30 Dec 2024 11:08:33 +0000 Subject: [PATCH 02/71] Removed test cases and added skeleton comments for lexing --- tests/lexer_test.cpp | 527 ++----------------------------------------- 1 file changed, 14 insertions(+), 513 deletions(-) diff --git a/tests/lexer_test.cpp b/tests/lexer_test.cpp index b4e541e..0d2d72d 100644 --- a/tests/lexer_test.cpp +++ b/tests/lexer_test.cpp @@ -4,518 +4,19 @@ #include "dragon/lexer.h" #include "dragon/token.h" - -TEST(LexerTests, TokeniseVariableDeclarations) { - const char* input = R"( - let variable324534 int = 42 - let mut t_t_3434_t_golden bool = true - let bacon_wrapped_4_shrimp float = 3.14 - )"; - - TokenList* tokens = tokenise(input); - - ASSERT_NE(tokens, nullptr); - ASSERT_EQ(tokens->count, 15); - - int i = 0; - - // let x int = 42 - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_LET_KW); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // x - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INT_KW); // int - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_EQUALS); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INTEGER); // 42 - - // let mut y bool = true - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_LET_KW); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_MUT_KW); // mut - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // y - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BOOL_KW); // bool - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_EQUALS); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_TRUE_KW); // true - - // let z float = 3.14 - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_LET_KW); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // z - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_FLOAT); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_EQUALS); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_FLOAT); // 3.14 - - free_tokens(tokens); -} - -TEST(LexerTests, TokeniseVariableAssignment) { - const char* input = "name = \"Alice and the frog\""; - TokenList* tokens = tokenise(input); - - ASSERT_NE(tokens, nullptr); - ASSERT_EQ(tokens->count, 5); - - int i = 0; - - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // "x" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_EQUALS); // "=" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INTEGER); // "42" - - free_tokens(tokens); -} - -TEST(LexerTests, TokeniseRelationalOperators) { - const char* input = "20 > x > 10 < 1000 <= 34_540 >= 0"; - TokenList* tokens = tokenise(input); - - ASSERT_NE(tokens, nullptr); - ASSERT_EQ(tokens->count, 6); - - int i = 0; - - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INTEGER); // "20" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_GRT); // ">" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // "x" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_GRT); // ">" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INTEGER); // "10" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_LSS); // "<" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INTEGER); // "1000" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_GTE); // ">=" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INTEGER); // "34540" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_LTE); // "<=" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INTEGER); // "0" - - free_tokens(tokens); -} - -TEST(LexerTests, TokeniseBooleanOperators) { - const char* input = "false || true && !!x"; - TokenList* tokens = tokenise(input); - - ASSERT_NE(tokens, nullptr); - ASSERT_EQ(tokens->count, 7); - - int i = 0; - - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_FALSE_KW); // "false" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_OR); // "||" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_TRUE_KW); // "true" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_AND); // "&&" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_NOT); // "!" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_NOT); // "!" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // "x" - - free_tokens(tokens); -} - -TEST(LexerTests, TokeniseElseIf) { - const char* input = R"( - if x > 10 { - return - } else if x < 5 { - break - } - )"; - - TokenList* tokens = tokenise(input); - - ASSERT_NE(tokens, nullptr); - ASSERT_EQ(tokens->count, 16); - - int i = 0; - - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IF_KW); // "if" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // "x" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_GRT); // ">" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INTEGER); // "10" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BRACE_OPEN); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_RETURN_KW); // "return" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BRACE_CLOSE); - - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_ELSE_KW); // "else" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IF_KW); // "if" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // "x" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_LTE); // "<" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INTEGER); // "5" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BRACE_OPEN); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BREAK_KW); // "break" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BRACE_CLOSE); - - free_tokens(tokens); -} - -TEST(LexerTests, TokeniseConditionalForLoop) { - const char* input = R"( - if 20 > x > 10 { - return - } else { - for i in 1..10 { - if i == 5 && !!y { - break - } - continue - } - } - )"; - - TokenList* tokens = tokenise(input); - - ASSERT_NE(tokens, nullptr); - ASSERT_EQ(tokens->count, 36); - - int i = 0; - - // if 20 > x > 10 - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IF_KW); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INTEGER); // 20 - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_GRT); // > - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // x - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_GRT); // > - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INTEGER); // 10 - - // { return } - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BRACE_OPEN); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_RETURN_KW); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BRACE_CLOSE); - - // else { for i in 1..10 { ... } } - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_ELSE_KW); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BRACE_OPEN); - - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_FOR_KW); // for - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // i - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IN_KW); // in - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INTEGER); // 1 - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_RANGE); // .. - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INTEGER); // 10 - - // { if i == 5 && !!y { break } continue } - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BRACE_OPEN); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IF_KW); // if - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // i - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_EQUALITY); // == - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INTEGER); // 5 - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_AND); // && - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_NOT); // ! - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_NOT); // ! - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // y - - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BRACE_OPEN); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BREAK_KW); // break - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BRACE_CLOSE); - - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_CONTINUE_KW); // continue - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BRACE_CLOSE); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BRACE_CLOSE); - - free_tokens(tokens); -} - -TEST(LexerTests, TokeniseWhileLoop) { - const char* input = R"( - while y { - y = false || false - } - )"; - - TokenList* tokens = tokenise(input); - - ASSERT_NE(tokens, nullptr); - ASSERT_EQ(tokens->count, 10); - - int i = 0; - - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_WHILE_KW); // while - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // y - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BRACE_OPEN); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // y - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_EQUALS); // = - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_FALSE_KW); // false - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_OR); // || - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_FALSE_KW); // false - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BRACE_CLOSE); - - free_tokens(tokens); -} - -TEST(LexerTests, TokeniseFunctionDefinition) { - const char* input = R"( - func add(a int, b int) -> int { - return a + b - } - )"; - - TokenList* tokens = tokenise(input); - - ASSERT_NE(tokens, nullptr); - ASSERT_EQ(tokens->count, 17); - - int i = 0; - - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_FUNC_KW); // func - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // add - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_PAREN_OPEN); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // a - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INT_KW); // int - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_COMMA); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // b - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INT_KW); // int - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_PAREN_CLOSE); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_RIGHT_ARROW); // -> - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INT_KW); // int - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BRACE_OPEN); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_RETURN_KW); // return - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // a - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_PLUS); // + - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // b - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BRACE_CLOSE); - - free_tokens(tokens); -} - -TEST(LexerTests, TokeniseLambdaFunction) { - const char* input = "let add = (x, y) => x + y"; - TokenList* tokens = tokenise(input); - - ASSERT_NE(tokens, nullptr); - ASSERT_EQ(tokens->count, 10); - - int i = 0; - - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_LET_KW); // "let" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // "add" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_EQUALS); // "=" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_PAREN_OPEN); // "(" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // "x" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_COMMA); // "," - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // "y" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_PAREN_CLOSE); // ")" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_DOUBLE_RIGHT_ARROW); // "=>" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // "x" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_PLUS); // "+" - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // "y" - - free_tokens(tokens); -} - -TEST(LexerTests, TokeniseEmptyInput) { - const char* input = ""; - TokenList* tokens = tokenise(input); - - ASSERT_NE(tokens, nullptr); - ASSERT_EQ(tokens->count, 0); - - free_tokens(tokens); -} - -TEST(LexerTests, TokeniseInvalidInput) { - const char* input = "$/?#¬`"; - TokenList* tokens = tokenise(input); - - - ASSERT_NE(tokens, nullptr); - ASSERT_EQ(tokens->count, 1); - - EXPECT_EQ(tokens->tokens[0].type, TOKEN_INVALID); - - free_tokens(tokens); -} - -// This tests everything supported by the lexer, including keywords, operators, and literals. -// The input is a large snippet of Dragon code that uses all the features of the language. -// This is useful to quickly check if the lexer is working as expected, more specific cases are useful for debugging. -TEST(LexerTests, TokeniseEverything) { - const char* input = R"( - let x int = 42 - let mut y bool = true - let z float = 3.14 - - if 20 > x > 10 { - return - } else { - for i in 1..10 { - if i == 5 && !!y { - break - } - continue - } - } - - while y { - y = false || false - } - - func add(a int, b int) -> int { - return a + b - } - - struct Point { - x float, - y float - } - - let mut p Point = Point{1, 2} - p.x = p.y * 10 / 100 - 5 - - enum Color { - Red, - Green, - Blue, - } - - Color.Green - - import math - return; - )"; - - TokenList* tokens = tokenise(input); - - ASSERT_NE(tokens, nullptr); - - ASSERT_EQ(tokens->count, 131); - - int i = 0; - - // let x int = 42 - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_LET_KW); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INT_KW); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_EQUALS); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INTEGER); - - // let mut y bool = true - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_LET_KW); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_MUT_KW); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BOOL_KW); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_EQUALS); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_TRUE_KW); - - // let z float = 3.14 - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_LET_KW); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_FLOAT); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_EQUALS); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_FLOAT); - - // if 20 > x > 10 - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IF_KW); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INTEGER); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_GRT); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_GRT); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INTEGER); - - // { return } - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BRACE_OPEN); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_RETURN_KW); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BRACE_CLOSE); - - // else { ... } - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_ELSE_KW); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BRACE_OPEN); - - // for i in 1..10 - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_FOR_KW); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IN_KW); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INTEGER); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_RANGE); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INTEGER); - - // Continue for other statements similarly... - - // while y { y = false || false } - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_WHILE_KW); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // y - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BRACE_OPEN); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // y - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_EQUALS); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_FALSE_KW); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_OR); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_FALSE_KW); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BRACE_CLOSE); - - // func add(a int, b int) -> int { return a + b } - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_FUNC_KW); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // add - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_PAREN_OPEN); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // a - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INT_KW); // int - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_COMMA); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // b - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INT_KW); // int - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_PAREN_CLOSE); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_RIGHT_ARROW); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INT_KW); // int - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BRACE_OPEN); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_RETURN_KW); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // a - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_PLUS); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // b - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BRACE_CLOSE); - - // struct Point { x float, y float } - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_STRUCT_KW); // struct - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // Point - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BRACE_OPEN); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // x - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_FLOAT); // float - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_COMMA); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // y - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_FLOAT); // float - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BRACE_CLOSE); - - // let mut p Point = Point{1, 2} - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_LET_KW); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_MUT_KW); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // p - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // Point - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_EQUALS); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // Point - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BRACE_OPEN); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INTEGER); // 1 - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_COMMA); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INTEGER); // 2 - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BRACE_CLOSE); - - // p.x = p.y * 10 / 100 - 5 - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // p - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_DOT); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // x - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_EQUALS); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // p - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_DOT); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // y - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_ASTERISK); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INTEGER); // 10 - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_SLASH); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INTEGER); // 100 - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_MINUS); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_INTEGER); // 5 - - // enum Color { Red, Green, Blue } - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_ENUM_KW); // enum - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // Color - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BRACE_OPEN); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // Red - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_COMMA); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // Green - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_COMMA); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // Blue - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_BRACE_CLOSE); - - // Color.Green - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // Color - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_DOT); - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // Green - - // import math - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IMPORT_KW); // import - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_IDENTIFIER); // math - - // return; - EXPECT_EQ(tokens->tokens[i++].type, TOKEN_RETURN_KW); // return - - free_tokens(tokens); -} - +// let +// let = +// let = +// const = +// const +// const = +// = +// () +// while { } +// for in { } +// for in { } +// if { } +// else { } +// else if { } From b7988c6addfc903b7d4c7ea70217f69a15721a83 Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Mon, 30 Dec 2024 11:08:40 +0000 Subject: [PATCH 03/71] Removed test cases and added skeleton comments for parsing --- tests/parser_test.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/parser_test.cpp b/tests/parser_test.cpp index e69de29..74a2bdc 100644 --- a/tests/parser_test.cpp +++ b/tests/parser_test.cpp @@ -0,0 +1,12 @@ +#include + +#include "dragon/lexer.h" +#include "dragon/token.h" +#include "dragon/parser.h" + +// Variable/Constant Declaration Node +// Variable/Constant Assignment Node +// While Loop Node +// For Loop Node +// If-Else Node +// Function Call Node \ No newline at end of file From db14bf6770ebf5fea4fb38a6aacff6f3d0143774 Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Mon, 30 Dec 2024 11:08:51 +0000 Subject: [PATCH 04/71] Removed test cases and added skeleton comments for semantics --- tests/semantics_test.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 tests/semantics_test.cpp diff --git a/tests/semantics_test.cpp b/tests/semantics_test.cpp new file mode 100644 index 0000000..e958733 --- /dev/null +++ b/tests/semantics_test.cpp @@ -0,0 +1,13 @@ +#include + +#include "dragon/lexer.h" +#include "dragon/token.h" +#include "dragon/parser.h" +#include "dragon/semantics.h" + +// Duplicate variable/constant identifier +// Duplicate function call identifier +// Non-boolean expression for while loop +// Non-boolean expression for if statement +// For loop expression isn't iterable +// Function call argument type doesn't match parameter type \ No newline at end of file From 73a9856269aa2c4cc5958acdd0cbfeef20db250a Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Mon, 30 Dec 2024 11:09:02 +0000 Subject: [PATCH 05/71] Deleted vm_test.cpp --- tests/vm_test.cpp | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 tests/vm_test.cpp diff --git a/tests/vm_test.cpp b/tests/vm_test.cpp deleted file mode 100644 index e69de29..0000000 From 85efb5eba213cda03811f4b391c96a5cf5e93cc9 Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Fri, 3 Jan 2025 07:11:39 +0000 Subject: [PATCH 06/71] Removed tilda from operators --- examples/Common Programming Concepts/Operators.drg | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/Common Programming Concepts/Operators.drg b/examples/Common Programming Concepts/Operators.drg index 2301d61..916099c 100644 --- a/examples/Common Programming Concepts/Operators.drg +++ b/examples/Common Programming Concepts/Operators.drg @@ -8,6 +8,5 @@ & | ^ > >= <= <= >> << -~ ? : */ \ No newline at end of file From 1711b972ef79919b1e3ba35d3060c4cf83acc91e Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Thu, 2 Jan 2025 22:00:00 +0000 Subject: [PATCH 07/71] Added lexer test for identifiers --- tests/lexer_test.cpp | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/tests/lexer_test.cpp b/tests/lexer_test.cpp index 0d2d72d..e72b494 100644 --- a/tests/lexer_test.cpp +++ b/tests/lexer_test.cpp @@ -4,6 +4,38 @@ #include "dragon/lexer.h" #include "dragon/token.h" +// +TEST(LexerTests, Identifier) { + const std::array input = { + "x", + "x_y123", + "reallyLongVariableNameWithNoNumbersOrUnderscores", + "U_ND_ER_SCO_RES", + "____starting___with__underscore", + "2thisShouldError_", // Identifiers cannot start with a number + "this should also error", // Variables cannot contain spaces + "Error?", // Identifiers cannot contain question marks + "#*&$£!!!", // Identifiers cannot contain any of these symbols + }; + const std::vector validTokens = { + Token(TokenType::Identifier, "x"), + Token(TokenType::Identifier, "x_y123"), + Token(TokenType::Identifier, "reallyLongVariableNameWithNoNumbersOrUnderscores"), + Token(TokenType::Identifier, "U_ND_ER_SCO_RES"), + Token(TokenType::Identifier, "____starting___with__underscore"), + Token(TokenType::Unknown, "2thisShouldError_"), + Token(TokenType::Unknown, "this should also error"), + Token(TokenType::Unknown, "Error?"), + Token(TokenType::Unknown, "#*&$£!!!") + }; + Lexer lexer; + + for (size_t i = 0; i < input.size(); i++) { + Token t = lexer.lex_identifier(input[i]); + ASSERT_TRUE(validTokens[i] == t); + } +} + // let // let = // let = From b75dd9f9ca72c2a3472759db76fcf193f74d64af Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Fri, 3 Jan 2025 07:16:46 +0000 Subject: [PATCH 08/71] Added lexer test for let --- tests/lexer_test.cpp | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/tests/lexer_test.cpp b/tests/lexer_test.cpp index e72b494..fd1ec5a 100644 --- a/tests/lexer_test.cpp +++ b/tests/lexer_test.cpp @@ -1,5 +1,5 @@ #include - +#include #include "dragon/lexer.h" #include "dragon/token.h" @@ -37,6 +37,20 @@ TEST(LexerTests, Identifier) { } // let +TEST(LexerTests, VariableDeclarationWithoutExpr) { + const std::string input = "let variable int"; + const std::vector validTokens = { + Token(TokenType::LetKeyword, "let"), + Token(TokenType::Identifier, "variable"), + Token(TokenType::Identifier, "int") + }; + Lexer lexer; + + std::vector tokens = lexer.lex(input); + + ASSERT_TRUE(validTokens == tokens); +} + // let = // let = // const = From 7b4d154440149482c7b8c72a51a2015ee0634efb Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Fri, 3 Jan 2025 07:18:05 +0000 Subject: [PATCH 09/71] Added more lexer test skeletons --- tests/lexer_test.cpp | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/tests/lexer_test.cpp b/tests/lexer_test.cpp index fd1ec5a..baf5403 100644 --- a/tests/lexer_test.cpp +++ b/tests/lexer_test.cpp @@ -36,6 +36,21 @@ TEST(LexerTests, Identifier) { } } +// +// Arithmetic + +// +// Boolean + +// +// Relational + +// +// bitwise + +// +// Mixed + // let TEST(LexerTests, VariableDeclarationWithoutExpr) { const std::string input = "let variable int"; @@ -57,12 +72,11 @@ TEST(LexerTests, VariableDeclarationWithoutExpr) { // const // const = // = -// () +// +// () // while { } // for in { } // for in { } // if { } -// else { } // else if { } - - +// else { } From 8a799c99f34ee0a57ea19c70ef07e4d8cf02b1aa Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Fri, 3 Jan 2025 07:24:56 +0000 Subject: [PATCH 10/71] Updated CMakeLists Removed vm.cpp. Added main.cpp. --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7ed8040..84c0cb5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,9 +12,9 @@ set(SOURCES src/lexer.cpp src/token.cpp src/parser.cpp - src/vm.cpp src/codegen.cpp src/semantics.cpp + src/main.cpp ) add_executable(dragon src/main.cpp ${SOURCES}) From 04c8f494946647e3cae1a6c3fb5cdd10fa4806b9 Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Fri, 3 Jan 2025 07:25:42 +0000 Subject: [PATCH 11/71] Removed vm_test.cpp, added semantics_test.cpp --- tests/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 82983d9..ac473c9 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -1,4 +1,4 @@ -add_executable(run_tests lexer_test.cpp parser_test.cpp vm_test.cpp) +add_executable(run_tests lexer_test.cpp parser_test.cpp semantics_test.cpp) target_link_libraries(run_tests gtest gtest_main pthread) target_include_directories(run_tests PRIVATE ../include) @@ -7,7 +7,7 @@ target_sources(run_tests PRIVATE ../src/lexer.cpp ../src/token.cpp ../src/parser.cpp - ../src/vm.cpp + ../src/semantics.cpp ) include(GoogleTest) From 7d0d9f2c7a5f96704db8674c50f4e511629ef83e Mon Sep 17 00:00:00 2001 From: Remy <107559570+hrszpuk@users.noreply.github.com> Date: Fri, 3 Jan 2025 08:48:13 +0000 Subject: [PATCH 12/71] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index d64281f..c6f464d 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Dragon +# Dragonfly Compiler A high-level multi-paradigm programming language. ``` From aca8c8a58b7bfbfd91f47cdb0b0294ca75867971 Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Sat, 4 Jan 2025 12:00:00 +0000 Subject: [PATCH 13/71] Added integer literals tests for lexer --- tests/lexer_test.cpp | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/lexer_test.cpp b/tests/lexer_test.cpp index baf5403..04f49e1 100644 --- a/tests/lexer_test.cpp +++ b/tests/lexer_test.cpp @@ -35,6 +35,28 @@ TEST(LexerTests, Identifier) { ASSERT_TRUE(validTokens[i] == t); } } +// +TEST(LexerTests, Integer) { + const std::array input = { + "1", + "123", + "0", + "1_000_000", // Underscores are allowed, but are ignored + }; + const std::array validTokens = { + Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::IntegerLiteral, "123"), + Token(TokenType::IntegerLiteral, "0"), + Token(TokenType::IntegerLiteral, "1_000_000") + }; + + Lexer lexer; + + for (size_t i = 0; i < input.size(); i++) { + Token t = lexer.lex_number(input[i]); + ASSERT_TRUE(validTokens[i] == t); + } +} // // Arithmetic From 0c6e63796a70b693809eeaedec327905ef1718c8 Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Sat, 4 Jan 2025 12:00:00 +0000 Subject: [PATCH 14/71] Added keyword tests for lexer --- tests/lexer_test.cpp | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/tests/lexer_test.cpp b/tests/lexer_test.cpp index 04f49e1..7f202bc 100644 --- a/tests/lexer_test.cpp +++ b/tests/lexer_test.cpp @@ -35,6 +35,38 @@ TEST(LexerTests, Identifier) { ASSERT_TRUE(validTokens[i] == t); } } + +// +TEST(LexerTests, Keywords) { + const std::array input = { + "let", + "mut", + "if", + "else", + "for", + "in", + "true", + "false", + "while", + }; + const std::vector validTokens = { + Token(TokenType::Let, "let"), + Token(TokenType::Mut, "mut"), + Token(TokenType::If, "if"), + Token(TokenType::Else, "else"), + Token(TokenType::For, "for"), + Token(TokenType::In, "in"), + Token(TokenType::True, "true"), + Token(TokenType::False, "false"), + }; + Lexer lexer; + + for (size_t i = 0; i < input.size(); i++) { + Token t = lexer.lex_identifier(input[i]); + ASSERT_TRUE(validTokens[i] == t); + } +} + // TEST(LexerTests, Integer) { const std::array input = { From eb5ec2348f678e3fa07bed515505766139f77bbe Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Sat, 4 Jan 2025 12:00:00 +0000 Subject: [PATCH 15/71] Added string literal tests for the lexer --- tests/lexer_test.cpp | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/lexer_test.cpp b/tests/lexer_test.cpp index 7f202bc..1d0cb2b 100644 --- a/tests/lexer_test.cpp +++ b/tests/lexer_test.cpp @@ -67,6 +67,28 @@ TEST(LexerTests, Keywords) { } } +// +TEST(LexerTests, StringLiterals) { + const std::array input = { + "\"Enter username: \"", + "\"This is a string with a escape characters \\\" \\n \\t \"", + "\"Abcdefghijklmnopqrstuvwxyz @#][{};;@'><,.//?)(*&^%$£1234567890+_-=`¬\\|\"" + + }; + const std::array validTokens = { + Token(TokenType::StringLiteral, "Enter username: "), + Token(TokenType::StringLiteral, "This is a string with a escape characters \" \n \t "), + Token(TokenType::StringLiteral, "Abcdefghijklmnopqrstuvwxyz @#][{};;@'><,.//?)(*&^%$£1234567890+_-=`¬\\|") + }; + + Lexer lexer; + + for (size_t i = 0; i < input.size(); i++) { + Token t = lexer.lex_string_literal(input[i]); + ASSERT_TRUE(validTokens[i] == t); + } +} + // TEST(LexerTests, Integer) { const std::array input = { From eda35eb561e6692153c314cf9e6986eb2bdfcd64 Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Sat, 4 Jan 2025 13:00:00 +0000 Subject: [PATCH 16/71] Added operators test cases for lexing --- tests/lexer_test.cpp | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tests/lexer_test.cpp b/tests/lexer_test.cpp index 1d0cb2b..88e64fe 100644 --- a/tests/lexer_test.cpp +++ b/tests/lexer_test.cpp @@ -112,6 +112,27 @@ TEST(LexerTests, Integer) { } } +// Test for operators +TEST(LexerTests, Operators) { + const std::array input = { + "+", "-", "*", "/", "=" + }; + const std::array validTokens = { + Token(TokenType::Plus, "+"), + Token(TokenType::Minus, "-"), + Token(TokenType::Asterisk, "*"), + Token(TokenType::Slash, "/"), + Token(TokenType::Equal, "=") + }; + + Lexer lexer; + + for (size_t i = 0; i < input.size(); i++) { + Token t = lexer.lex_operator(input[i]); + ASSERT_TRUE(validTokens[i] == t); + } +} + // // Arithmetic From 19474714e0ef41a5732a37e7e26d52bd4cdc052b Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Sat, 4 Jan 2025 13:00:00 +0000 Subject: [PATCH 17/71] Added punctuation test case for lexing --- tests/lexer_test.cpp | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/lexer_test.cpp b/tests/lexer_test.cpp index 88e64fe..38fa7c9 100644 --- a/tests/lexer_test.cpp +++ b/tests/lexer_test.cpp @@ -133,6 +133,25 @@ TEST(LexerTests, Operators) { } } +// Test for punctuation +TEST(LexerTests, Punctuation) { + const std::array input = { + "{", "}", ";" + }; + const std::array validTokens = { + Token(TokenType::LeftBrace, "{"), + Token(TokenType::RightBrace, "}"), + Token(TokenType::Semicolon, ";") + }; + + Lexer lexer; + + for (size_t i = 0; i < input.size(); i++) { + Token t = lexer.lex_punctuation(input[i]); + ASSERT_TRUE(validTokens[i] == t); + } +} + // // Arithmetic From 951ac44b3d7843cf03bb0e8213eba6347bfd88b1 Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Sat, 4 Jan 2025 13:00:00 +0000 Subject: [PATCH 18/71] Added signle-line comments test case --- tests/lexer_test.cpp | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tests/lexer_test.cpp b/tests/lexer_test.cpp index 38fa7c9..f057017 100644 --- a/tests/lexer_test.cpp +++ b/tests/lexer_test.cpp @@ -152,6 +152,24 @@ TEST(LexerTests, Punctuation) { } } +// Test for single-line comments +TEST(LexerTests, SingleLineComments) { + const std::array input = { + "// This is a comment" + }; + const std::array validTokens = { + Token(TokenType::Comment, "// This is a comment") + }; + + Lexer lexer; + + for (size_t i = 0; i < input.size(); i++) { + Token t = lexer.lex_comment(input[i]); + ASSERT_TRUE(validTokens[i] == t); + } +} + + // // Arithmetic From 324fb1a2c5c9d8588e1bed937ffcb2e9a6b8543d Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Sat, 4 Jan 2025 13:00:00 +0000 Subject: [PATCH 19/71] Added multi-line comment test case for lexing --- tests/lexer_test.cpp | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/lexer_test.cpp b/tests/lexer_test.cpp index f057017..82de1f4 100644 --- a/tests/lexer_test.cpp +++ b/tests/lexer_test.cpp @@ -169,6 +169,22 @@ TEST(LexerTests, SingleLineComments) { } } +// Test for multi-line comments +TEST(LexerTests, MultiLineComments) { + const std::array input = { + "/* This is a multi-line comment */" + }; + const std::array validTokens = { + Token(TokenType::Comment, "/* This is a multi-line comment */") + }; + + Lexer lexer; + + for (size_t i = 0; i < input.size(); i++) { + Token t = lexer.lex_comment(input[i]); + ASSERT_TRUE(validTokens[i] == t); + } +} // // Arithmetic From 2760f5971d3c54dd4c4cb8641e940937a1baa5d7 Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Sun, 5 Jan 2025 11:18:26 +0000 Subject: [PATCH 20/71] Added arithmetic expression test case for lexer --- tests/lexer_test.cpp | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/tests/lexer_test.cpp b/tests/lexer_test.cpp index 82de1f4..2ad3ed2 100644 --- a/tests/lexer_test.cpp +++ b/tests/lexer_test.cpp @@ -188,6 +188,42 @@ TEST(LexerTests, MultiLineComments) { // // Arithmetic +TEST(LexerTests, Arithmetic) { + const std::array input = { + "1 + 2", + "1 - 2", + "1 * 2", + "1 / 2" + }; + const std::array, 4> validTokens = { + { + Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::Plus, "+"), + Token(TokenType::IntegerLiteral, "2") + }, + { + Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::Minus, "-"), + Token(TokenType::IntegerLiteral, "2") + }, + { + Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::Star, "*"), + Token(TokenType::IntegerLiteral, "2") + }, + { + Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::Slash, "/"), + Token(TokenType::IntegerLiteral, "2") + } + }; + Lexer lexer; + + for (size_t i = 0; i < input.size(); i++) { + std::vector tokens = lexer.lex(input[i]); + ASSERT_TRUE(validTokens[i] == tokens); + } +} // // Boolean From fb2d900ce090e7340c086bf9fc024e8431608b3e Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Sun, 5 Jan 2025 11:26:16 +0000 Subject: [PATCH 21/71] Added boolean expression test case for lexing --- tests/lexer_test.cpp | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/tests/lexer_test.cpp b/tests/lexer_test.cpp index 2ad3ed2..0580aa0 100644 --- a/tests/lexer_test.cpp +++ b/tests/lexer_test.cpp @@ -227,6 +227,42 @@ TEST(LexerTests, Arithmetic) { // // Boolean +TEST(LexerTests, Boolean) { + const std::array input = { + "true && false", + "true || false", + "!true", + "true == false" + "true != false" + }; + const std::array, 4> validTokens = { + { + Token(TokenType::True, "true"), + Token(TokenType::And, "&&"), + Token(TokenType::False, "false") + }, + { + Token(TokenType::True, "true"), + Token(TokenType::Or, "||"), + Token(TokenType::False, "false") + }, + { + Token(TokenType::Bang, "!"), + Token(TokenType::True, "true") + }, + { + Token(TokenType::True, "true"), + Token(TokenType::EqualEqual, "=="), + Token(TokenType::False, "false") + } + }; + Lexer lexer; + + for (size_t i = 0; i < input.size(); i++) { + std::vector tokens = lexer.lex(input[i]); + ASSERT_TRUE(validTokens[i] == tokens); + } +} // // Relational From 75805c37cbd4ab707f1d8e7821ba4fa3d744a99d Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Sun, 5 Jan 2025 11:27:46 +0000 Subject: [PATCH 22/71] Added relational expression test cases for lexing --- tests/lexer_test.cpp | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/tests/lexer_test.cpp b/tests/lexer_test.cpp index 0580aa0..6ae2e99 100644 --- a/tests/lexer_test.cpp +++ b/tests/lexer_test.cpp @@ -266,6 +266,42 @@ TEST(LexerTests, Boolean) { // // Relational +TEST(LexerTests, Relational) { + const std::array input = { + "1 < 2", + "1 > 2", + "1 <= 2", + "1 >= 2" + }; + const std::array, 4> validTokens = { + { + Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::Less, "<"), + Token(TokenType::IntegerLiteral, "2") + }, + { + Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::Greater, ">"), + Token(TokenType::IntegerLiteral, "2") + }, + { + Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::LessEqual, "<="), + Token(TokenType::IntegerLiteral, "2") + }, + { + Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::GreaterEqual, ">="), + Token(TokenType::IntegerLiteral, "2") + } + }; + Lexer lexer; + + for (size_t i = 0; i < input.size(); i++) { + std::vector tokens = lexer.lex(input[i]); + ASSERT_TRUE(validTokens[i] == tokens); + } +} // // bitwise From 0be43920b12502647044864f167d37a1f737f028 Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Sun, 5 Jan 2025 11:28:09 +0000 Subject: [PATCH 23/71] Added bitwise expression test cases for lexing --- tests/lexer_test.cpp | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/tests/lexer_test.cpp b/tests/lexer_test.cpp index 6ae2e99..dadc2b4 100644 --- a/tests/lexer_test.cpp +++ b/tests/lexer_test.cpp @@ -305,6 +305,41 @@ TEST(LexerTests, Relational) { // // bitwise +TEST(LexerTests, Bitwise) { + const std::array input = { + "1 & 2", + "1 | 2", + "1 ^ 2", + "~1" + }; + const std::array, 4> validTokens = { + { + Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::Ampersand, "&"), + Token(TokenType::IntegerLiteral, "2") + }, + { + Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::Pipe, "|"), + Token(TokenType::IntegerLiteral, "2") + }, + { + Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::Caret, "^"), + Token(TokenType::IntegerLiteral, "2") + }, + { + Token(TokenType::Tilde, "~"), + Token(TokenType::IntegerLiteral, "1") + } + }; + Lexer lexer; + + for (size_t i = 0; i < input.size(); i++) { + std::vector tokens = lexer.lex(input[i]); + ASSERT_TRUE(validTokens[i] == tokens); + } +} // // Mixed From d6433c15652661e930ac4d465270ea962d6a2c26 Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Sun, 5 Jan 2025 11:28:32 +0000 Subject: [PATCH 24/71] Added mixed operaotr expression test cases for lexing --- tests/lexer_test.cpp | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/tests/lexer_test.cpp b/tests/lexer_test.cpp index dadc2b4..929d249 100644 --- a/tests/lexer_test.cpp +++ b/tests/lexer_test.cpp @@ -343,6 +343,47 @@ TEST(LexerTests, Bitwise) { // // Mixed +TEST(LexerTests, Mixed) { + const std::string input = "1 + 2 * 3 / 4 - 5 == !true && 7 < 8 || 9 > 10 && 11 <= 12 | 13 & 14 ^ 15"; + const std::vector validTokens = { + Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::Plus, "+"), + Token(TokenType::IntegerLiteral, "2"), + Token(TokenType::Star, "*"), + Token(TokenType::IntegerLiteral, "3"), + Token(TokenType::Slash, "/"), + Token(TokenType::IntegerLiteral, "4"), + Token(TokenType::Minus, "-"), + Token(TokenType::IntegerLiteral, "5"), + Token(TokenType::EqualEqual, "=="), + Token(TokenType::Bang, "!"), + Token(TokenType::True, "true"), + Token(TokenType::And, "&&"), + Token(TokenType::IntegerLiteral, "7"), + Token(TokenType::Less, "<"), + Token(TokenType::IntegerLiteral, "8"), + Token(TokenType::Or, "||"), + Token(TokenType::IntegerLiteral, "9"), + Token(TokenType::Greater, ">"), + Token(TokenType::IntegerLiteral, "10"), + Token(TokenType::And, "&&"), + Token(TokenType::IntegerLiteral, "11"), + Token(TokenType::LessEqual, "<="), + Token(TokenType::IntegerLiteral, "12"), + Token(TokenType::Pipe, "|"), + Token(TokenType::IntegerLiteral, "13"), + Token(TokenType::Ampersand, "&"), + Token(TokenType::IntegerLiteral, "14"), + Token(TokenType::Caret, "^"), + Token(TokenType::IntegerLiteral, "15") + }; + + Lexer lexer; + + std::vector tokens = lexer.lex(input); + + ASSERT_TRUE(validTokens == tokens); +} // let TEST(LexerTests, VariableDeclarationWithoutExpr) { From afdfd7087d4f37901c086adc09fdb022cea1636e Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Sun, 5 Jan 2025 11:29:10 +0000 Subject: [PATCH 25/71] Added constant declaration with type and expression test case for lexing --- tests/lexer_test.cpp | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/lexer_test.cpp b/tests/lexer_test.cpp index 929d249..af02548 100644 --- a/tests/lexer_test.cpp +++ b/tests/lexer_test.cpp @@ -401,6 +401,23 @@ TEST(LexerTests, VariableDeclarationWithoutExpr) { } // let = +TEST (LexerTests, VariableDeclarationWithExpr) { + const std::string input = "let variable int = 1 + 2"; + const std::vector validTokens = { + Token(TokenType::LetKeyword, "let"), + Token(TokenType::Identifier, "variable"), + Token(TokenType::Identifier, "int"), + Token(TokenType::Equal, "="), + Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::Plus, "+"), + Token(TokenType::IntegerLiteral, "2") + }; + Lexer lexer; + + std::vector tokens = lexer.lex(input); + + ASSERT_TRUE(validTokens == tokens); +} // let = // const = // const From c1dc574277dabc92682ac74d001f6954b3508053 Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Sun, 5 Jan 2025 11:31:12 +0000 Subject: [PATCH 26/71] Added constant declaration with expression test case for lexing --- tests/lexer_test.cpp | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/tests/lexer_test.cpp b/tests/lexer_test.cpp index af02548..8084ded 100644 --- a/tests/lexer_test.cpp +++ b/tests/lexer_test.cpp @@ -418,10 +418,24 @@ TEST (LexerTests, VariableDeclarationWithExpr) { ASSERT_TRUE(validTokens == tokens); } + // let = -// const = -// const -// const = +TEST(LexerTests, VariableDeclarationWithoutType) { + const std::string input = "let variable = 1 + 2"; + const std::vector validTokens = { + Token(TokenType::LetKeyword, "let"), + Token(TokenType::Identifier, "variable"), + Token(TokenType::Equal, "="), + Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::Plus, "+"), + Token(TokenType::IntegerLiteral, "2") + }; + Lexer lexer; + + std::vector tokens = lexer.lex(input); + + ASSERT_TRUE(validTokens == tokens); +} // = // // () From 64cc955006da95b34a201d8d617d515fddd25346 Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Sun, 5 Jan 2025 11:31:57 +0000 Subject: [PATCH 27/71] Added variable declaration with type and expression test case for lexing --- tests/lexer_test.cpp | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tests/lexer_test.cpp b/tests/lexer_test.cpp index 8084ded..c7903cb 100644 --- a/tests/lexer_test.cpp +++ b/tests/lexer_test.cpp @@ -436,6 +436,27 @@ TEST(LexerTests, VariableDeclarationWithoutType) { ASSERT_TRUE(validTokens == tokens); } + +// let mut = +TEST(LexerTests, MutableVariableDeclarationWithExpr) { + const std::string input = "let mut variable int = 1 + 2"; + const std::vector validTokens = { + Token(TokenType::LetKeyword, "let"), + Token(TokenType::MutKeyword, "mut"), + Token(TokenType::Identifier, "variable"), + Token(TokenType::Identifier, "int"), + Token(TokenType::Equal, "="), + Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::Plus, "+"), + Token(TokenType::IntegerLiteral, "2") + }; + Lexer lexer; + + std::vector tokens = lexer.lex(input); + + ASSERT_TRUE(validTokens == tokens); +} + // = // // () From 2c47482aa96d2fde0ab0e6a4a37a623ed49585c9 Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Sun, 5 Jan 2025 11:32:17 +0000 Subject: [PATCH 28/71] Added variable declaration with type test case for lexing --- tests/lexer_test.cpp | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/lexer_test.cpp b/tests/lexer_test.cpp index c7903cb..1f7436b 100644 --- a/tests/lexer_test.cpp +++ b/tests/lexer_test.cpp @@ -457,6 +457,22 @@ TEST(LexerTests, MutableVariableDeclarationWithExpr) { ASSERT_TRUE(validTokens == tokens); } +// let mut +TEST(LexerTests, MutableVariableDeclarationWithoutExpr) { + const std::string input = "let mut variable int"; + const std::vector validTokens = { + Token(TokenType::LetKeyword, "let"), + Token(TokenType::MutKeyword, "mut"), + Token(TokenType::Identifier, "variable"), + Token(TokenType::Identifier, "int") + }; + Lexer lexer; + + std::vector tokens = lexer.lex(input); + + ASSERT_TRUE(validTokens == tokens); +} + // = // // () From f965ce96e3d816ad71d52eb94657cf671e5b85e2 Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Sun, 5 Jan 2025 11:32:50 +0000 Subject: [PATCH 29/71] Added variable declaration with expression test case for lexing --- tests/lexer_test.cpp | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/lexer_test.cpp b/tests/lexer_test.cpp index 1f7436b..2db3cbc 100644 --- a/tests/lexer_test.cpp +++ b/tests/lexer_test.cpp @@ -473,6 +473,25 @@ TEST(LexerTests, MutableVariableDeclarationWithoutExpr) { ASSERT_TRUE(validTokens == tokens); } +// let mut = +TEST(LexerTests, MutableVariableDeclarationWithoutType) { + const std::string input = "let mut variable = 1 + 2"; + const std::vector validTokens = { + Token(TokenType::LetKeyword, "let"), + Token(TokenType::MutKeyword, "mut"), + Token(TokenType::Identifier, "variable"), + Token(TokenType::Equal, "="), + Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::Plus, "+"), + Token(TokenType::IntegerLiteral, "2") + }; + Lexer lexer; + + std::vector tokens = lexer.lex(input); + + ASSERT_TRUE(validTokens == tokens); +} + // = // // () From af5ad602e454b90895ec77f65c6250c9f7b95cea Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Sun, 5 Jan 2025 11:33:22 +0000 Subject: [PATCH 30/71] Added variable assignment test case for lexing --- tests/lexer_test.cpp | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/tests/lexer_test.cpp b/tests/lexer_test.cpp index 2db3cbc..a2bed56 100644 --- a/tests/lexer_test.cpp +++ b/tests/lexer_test.cpp @@ -493,8 +493,22 @@ TEST(LexerTests, MutableVariableDeclarationWithoutType) { } // = -// -// () +TEST(LexerTests, Assignment) { + const std::string input = "variable = 1 + 2"; + const std::vector validTokens = { + Token(TokenType::Identifier, "variable"), + Token(TokenType::Equal, "="), + Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::Plus, "+"), + Token(TokenType::IntegerLiteral, "2") + }; + Lexer lexer; + + std::vector tokens = lexer.lex(input); + + ASSERT_TRUE(validTokens == tokens); +} + // while { } // for in { } // for in { } From d64ea3955f11ffe4d3200a16ca5a1e568b618e1b Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Sun, 5 Jan 2025 11:41:36 +0000 Subject: [PATCH 31/71] Added arguments test case for lexing --- tests/lexer_test.cpp | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tests/lexer_test.cpp b/tests/lexer_test.cpp index a2bed56..4898661 100644 --- a/tests/lexer_test.cpp +++ b/tests/lexer_test.cpp @@ -509,6 +509,26 @@ TEST(LexerTests, Assignment) { ASSERT_TRUE(validTokens == tokens); } +// +// Example: x, y, 1 + 3 +TEST(LexerTests, Arguments) { + const std::string input = "x, y, 1 + 3"; + const std::vector validTokens = { + Token(TokenType::Identifier, "x"), + Token(TokenType::Comma, ","), + Token(TokenType::Identifier, "y"), + Token(TokenType::Comma, ","), + Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::Plus, "+"), + Token(TokenType::IntegerLiteral, "3") + }; + Lexer lexer; + + std::vector tokens = lexer.lex(input); + + ASSERT_TRUE(validTokens == tokens); +} + // while { } // for in { } // for in { } From ed3f3911ce903ad6437086ebd666ed8839ccc444 Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Sun, 5 Jan 2025 11:41:59 +0000 Subject: [PATCH 32/71] Added test cases for lexing function calls --- tests/lexer_test.cpp | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/lexer_test.cpp b/tests/lexer_test.cpp index 4898661..ada8ce3 100644 --- a/tests/lexer_test.cpp +++ b/tests/lexer_test.cpp @@ -529,6 +529,28 @@ TEST(LexerTests, Arguments) { ASSERT_TRUE(validTokens == tokens); } +// () +TEST(LexerTests, FunctionCall) { + const std::string input = "function(x, y, 1 + 3)"; + const std::vector validTokens = { + Token(TokenType::Identifier, "function"), + Token(TokenType::LeftParen, "("), + Token(TokenType::Identifier, "x"), + Token(TokenType::Comma, ","), + Token(TokenType::Identifier, "y"), + Token(TokenType::Comma, ","), + Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::Plus, "+"), + Token(TokenType::IntegerLiteral, "3"), + Token(TokenType::RightParen, ")") + }; + Lexer lexer; + + std::vector tokens = lexer.lex(input); + + ASSERT_TRUE(validTokens == tokens); +} + // while { } // for in { } // for in { } From 2a5dc70139d136384afc550b2f0a69265b946230 Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Sun, 5 Jan 2025 11:42:17 +0000 Subject: [PATCH 33/71] Added test cases for lexing while loops --- tests/lexer_test.cpp | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/lexer_test.cpp b/tests/lexer_test.cpp index ada8ce3..c3e7c19 100644 --- a/tests/lexer_test.cpp +++ b/tests/lexer_test.cpp @@ -552,6 +552,28 @@ TEST(LexerTests, FunctionCall) { } // while { } +TEST(LexerTests, WhileLoop) { + const std::string input = "while x < 10 { x = x + 1 }"; + const std::vector validTokens = { + Token(TokenType::While, "while"), + Token(TokenType::Identifier, "x"), + Token(TokenType::Less, "<"), + Token(TokenType::IntegerLiteral, "10"), + Token(TokenType::LeftBrace, "{"), + Token(TokenType::Identifier, "x"), + Token(TokenType::Equal, "="), + Token(TokenType::Identifier, "x"), + Token(TokenType::Plus, "+"), + Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::RightBrace, "}") + }; + Lexer lexer; + + std::vector tokens = lexer.lex(input); + + ASSERT_TRUE(validTokens == tokens); +} + // for in { } // for in { } // if { } From 3fe173600a669b33ee91f1b7cb388ba9b005624f Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Sun, 5 Jan 2025 11:42:35 +0000 Subject: [PATCH 34/71] Added test cases for lexing for loops (identifiers) --- tests/lexer_test.cpp | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tests/lexer_test.cpp b/tests/lexer_test.cpp index c3e7c19..a369f74 100644 --- a/tests/lexer_test.cpp +++ b/tests/lexer_test.cpp @@ -575,6 +575,30 @@ TEST(LexerTests, WhileLoop) { } // for in { } +TEST(LexerTests, ForLoop) { + const std::string input = "for i in 0..10 { i = i + 1 }"; + const std::vector validTokens = { + Token(TokenType::For, "for"), + Token(TokenType::Identifier, "i"), + Token(TokenType::In, "in"), + Token(TokenType::IntegerLiteral, "0"), + Token(TokenType::Range, ".."), + Token(TokenType::IntegerLiteral, "10"), + Token(TokenType::LeftBrace, "{"), + Token(TokenType::Identifier, "i"), + Token(TokenType::Equal, "="), + Token(TokenType::Identifier, "i"), + Token(TokenType::Plus, "+"), + Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::RightBrace, "}") + }; + Lexer lexer; + + std::vector tokens = lexer.lex(input); + + ASSERT_TRUE(validTokens == tokens); +} + // for in { } // if { } // else if { } From 6d7c00a75a0ac2395f7df790530282b30ac34698 Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Sun, 5 Jan 2025 11:42:50 +0000 Subject: [PATCH 35/71] Added test cases for lexing for loops (expressions) --- tests/lexer_test.cpp | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tests/lexer_test.cpp b/tests/lexer_test.cpp index a369f74..a989627 100644 --- a/tests/lexer_test.cpp +++ b/tests/lexer_test.cpp @@ -600,6 +600,34 @@ TEST(LexerTests, ForLoop) { } // for in { } +TEST(LexerTests, ForLoopWithExpr) { + const std::string input = "for i in 0..(10 + 1) { i = i + 1 }"; + const std::vector validTokens = { + Token(TokenType::For, "for"), + Token(TokenType::Identifier, "i"), + Token(TokenType::In, "in"), + Token(TokenType::IntegerLiteral, "0"), + Token(TokenType::Range, ".."), + Token(TokenType::LeftParen, "("), + Token(TokenType::IntegerLiteral, "10"), + Token(TokenType::Plus, "+"), + Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::RightParen, ")"), + Token(TokenType::LeftBrace, "{"), + Token(TokenType::Identifier, "i"), + Token(TokenType::Equal, "="), + Token(TokenType::Identifier, "i"), + Token(TokenType::Plus, "+"), + Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::RightBrace, "}") + }; + Lexer lexer; + + std::vector tokens = lexer.lex(input); + + ASSERT_TRUE(validTokens == tokens); +} + // if { } // else if { } // else { } From 3f39f937a68daa4869b8f268fc2cfd5549bb8070 Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Sun, 5 Jan 2025 11:43:02 +0000 Subject: [PATCH 36/71] Added test cases for lexing if statements --- tests/lexer_test.cpp | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/lexer_test.cpp b/tests/lexer_test.cpp index a989627..f40a41c 100644 --- a/tests/lexer_test.cpp +++ b/tests/lexer_test.cpp @@ -629,5 +629,27 @@ TEST(LexerTests, ForLoopWithExpr) { } // if { } +TEST(LexerTests, IfStatement) { + const std::string input = "if x < 10 { x = x + 1 }"; + const std::vector validTokens = { + Token(TokenType::If, "if"), + Token(TokenType::Identifier, "x"), + Token(TokenType::Less, "<"), + Token(TokenType::IntegerLiteral, "10"), + Token(TokenType::LeftBrace, "{"), + Token(TokenType::Identifier, "x"), + Token(TokenType::Equal, "="), + Token(TokenType::Identifier, "x"), + Token(TokenType::Plus, "+"), + Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::RightBrace, "}") + }; + Lexer lexer; + + std::vector tokens = lexer.lex(input); + + ASSERT_TRUE(validTokens == tokens); +} + // else if { } // else { } From 73fe1035a109968c03b12731fc4109f19030fdfb Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Sun, 5 Jan 2025 11:43:26 +0000 Subject: [PATCH 37/71] Added test cases for lexing if else if statement --- tests/lexer_test.cpp | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/tests/lexer_test.cpp b/tests/lexer_test.cpp index f40a41c..efccfd2 100644 --- a/tests/lexer_test.cpp +++ b/tests/lexer_test.cpp @@ -652,4 +652,38 @@ TEST(LexerTests, IfStatement) { } // else if { } +TEST(LexerTests, ElseIfStatement) { + const std::string input = "if x < 10 { x = x + 1 } else if x > 10 { x = x - 1 }"; + const std::vector validTokens = { + Token(TokenType::If, "if"), + Token(TokenType::Identifier, "x"), + Token(TokenType::Less, "<"), + Token(TokenType::IntegerLiteral, "10"), + Token(TokenType::LeftBrace, "{"), + Token(TokenType::Identifier, "x"), + Token(TokenType::Equal, "="), + Token(TokenType::Identifier, "x"), + Token(TokenType::Plus, "+"), + Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::RightBrace, "}"), + Token(TokenType::Else, "else"), + Token(TokenType::If, "if"), + Token(TokenType::Identifier, "x"), + Token(TokenType::Greater, ">"), + Token(TokenType::IntegerLiteral, "10"), + Token(TokenType::LeftBrace, "{"), + Token(TokenType::Identifier, "x"), + Token(TokenType::Equal, "="), + Token(TokenType::Identifier, "x"), + Token(TokenType::Minus, "-"), + Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::RightBrace, "}") + }; + Lexer lexer; + + std::vector tokens = lexer.lex(input); + + ASSERT_TRUE(validTokens == tokens); +} + // else { } From b7706bdf03122ab1801f95af4463cfc33feb14d8 Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Sun, 5 Jan 2025 11:44:00 +0000 Subject: [PATCH 38/71] Added test cases for lexing if else statement --- tests/lexer_test.cpp | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/tests/lexer_test.cpp b/tests/lexer_test.cpp index efccfd2..59a6706 100644 --- a/tests/lexer_test.cpp +++ b/tests/lexer_test.cpp @@ -687,3 +687,32 @@ TEST(LexerTests, ElseIfStatement) { } // else { } +TEST(LexerTests, ElseStatement) { + const std::string input = "if x < 10 { x = x + 1 } else { x = x - 1 }"; + const std::vector validTokens = { + Token(TokenType::If, "if"), + Token(TokenType::Identifier, "x"), + Token(TokenType::Less, "<"), + Token(TokenType::IntegerLiteral, "10"), + Token(TokenType::LeftBrace, "{"), + Token(TokenType::Identifier, "x"), + Token(TokenType::Equal, "="), + Token(TokenType::Identifier, "x"), + Token(TokenType::Plus, "+"), + Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::RightBrace, "}"), + Token(TokenType::Else, "else"), + Token(TokenType::LeftBrace, "{"), + Token(TokenType::Identifier, "x"), + Token(TokenType::Equal, "="), + Token(TokenType::Identifier, "x"), + Token(TokenType::Minus, "-"), + Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::RightBrace, "}") + }; + Lexer lexer; + + std::vector tokens = lexer.lex(input); + + ASSERT_TRUE(validTokens == tokens); +} \ No newline at end of file From e67adcff7e78244eef19e9917b3dc87d77f85226 Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Sun, 5 Jan 2025 11:46:28 +0000 Subject: [PATCH 39/71] Added missing "while" keyword in valid tokens check for keywords (lexing) --- tests/lexer_test.cpp | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/tests/lexer_test.cpp b/tests/lexer_test.cpp index 59a6706..f135ef1 100644 --- a/tests/lexer_test.cpp +++ b/tests/lexer_test.cpp @@ -58,6 +58,7 @@ TEST(LexerTests, Keywords) { Token(TokenType::In, "in"), Token(TokenType::True, "true"), Token(TokenType::False, "false"), + Token(TokenType::While, "while"), }; Lexer lexer; @@ -385,11 +386,11 @@ TEST(LexerTests, Mixed) { ASSERT_TRUE(validTokens == tokens); } -// let +// let TEST(LexerTests, VariableDeclarationWithoutExpr) { const std::string input = "let variable int"; const std::vector validTokens = { - Token(TokenType::LetKeyword, "let"), + Token(TokenType::Let, "let"), Token(TokenType::Identifier, "variable"), Token(TokenType::Identifier, "int") }; @@ -404,7 +405,7 @@ TEST(LexerTests, VariableDeclarationWithoutExpr) { TEST (LexerTests, VariableDeclarationWithExpr) { const std::string input = "let variable int = 1 + 2"; const std::vector validTokens = { - Token(TokenType::LetKeyword, "let"), + Token(TokenType::Let, "let"), Token(TokenType::Identifier, "variable"), Token(TokenType::Identifier, "int"), Token(TokenType::Equal, "="), @@ -423,7 +424,7 @@ TEST (LexerTests, VariableDeclarationWithExpr) { TEST(LexerTests, VariableDeclarationWithoutType) { const std::string input = "let variable = 1 + 2"; const std::vector validTokens = { - Token(TokenType::LetKeyword, "let"), + Token(TokenType::Let, "let"), Token(TokenType::Identifier, "variable"), Token(TokenType::Equal, "="), Token(TokenType::IntegerLiteral, "1"), @@ -441,8 +442,8 @@ TEST(LexerTests, VariableDeclarationWithoutType) { TEST(LexerTests, MutableVariableDeclarationWithExpr) { const std::string input = "let mut variable int = 1 + 2"; const std::vector validTokens = { - Token(TokenType::LetKeyword, "let"), - Token(TokenType::MutKeyword, "mut"), + Token(TokenType::Let, "let"), + Token(TokenType::Mut, "mut"), Token(TokenType::Identifier, "variable"), Token(TokenType::Identifier, "int"), Token(TokenType::Equal, "="), @@ -461,8 +462,8 @@ TEST(LexerTests, MutableVariableDeclarationWithExpr) { TEST(LexerTests, MutableVariableDeclarationWithoutExpr) { const std::string input = "let mut variable int"; const std::vector validTokens = { - Token(TokenType::LetKeyword, "let"), - Token(TokenType::MutKeyword, "mut"), + Token(TokenType::Let, "let"), + Token(TokenType::Mut, "mut"), Token(TokenType::Identifier, "variable"), Token(TokenType::Identifier, "int") }; @@ -477,8 +478,8 @@ TEST(LexerTests, MutableVariableDeclarationWithoutExpr) { TEST(LexerTests, MutableVariableDeclarationWithoutType) { const std::string input = "let mut variable = 1 + 2"; const std::vector validTokens = { - Token(TokenType::LetKeyword, "let"), - Token(TokenType::MutKeyword, "mut"), + Token(TokenType::Let, "let"), + Token(TokenType::Mut, "mut"), Token(TokenType::Identifier, "variable"), Token(TokenType::Equal, "="), Token(TokenType::IntegerLiteral, "1"), From 664cf19523b45d4391a69304ad157613a877a831 Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Sun, 5 Jan 2025 12:25:27 +0000 Subject: [PATCH 40/71] Deleted token.cpp as all methods can be inlined --- src/token.cpp | 27 --------------------------- 1 file changed, 27 deletions(-) delete mode 100644 src/token.cpp diff --git a/src/token.cpp b/src/token.cpp deleted file mode 100644 index b152832..0000000 --- a/src/token.cpp +++ /dev/null @@ -1,27 +0,0 @@ -#include "dragon/token.h" -#include -#include - -TokenList* create_token_list() { - TokenList* list = (TokenList*)malloc(sizeof(TokenList)); - list->tokens = (Token*)malloc(sizeof(Token) * 8); - list->count = 0; - list->capacity = 8; - return list; -} - -void append_token(TokenList* list, Token token) { - if (list->count >= list->capacity) { - list->capacity *= 2; - list->tokens = (Token*)realloc(list->tokens, sizeof(Token) * list->capacity); - } - list->tokens[list->count++] = token; -} - -void free_tokens(TokenList* list) { - for (size_t i = 0; i < list->count; i++) { - free(list->tokens[i].value); - } - free(list->tokens); - free(list); -} From ba232cea9934f9450bd0949a53200807c87b0778 Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Sun, 5 Jan 2025 12:25:51 +0000 Subject: [PATCH 41/71] Switched to #pragma once --- include/dragon/token.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/include/dragon/token.h b/include/dragon/token.h index 6f624a9..2a6219a 100644 --- a/include/dragon/token.h +++ b/include/dragon/token.h @@ -1,9 +1,7 @@ -#ifndef TOKEN_H -#define TOKEN_H +#pragma once -#include - -typedef enum { +#include +#include // Keywords TOKEN_LET_KW, // "let" TOKEN_MUT_KW, // "mut" From 52633a6638660cd884fc63cfeff950584ad6b10a Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Sun, 5 Jan 2025 12:26:16 +0000 Subject: [PATCH 42/71] Added token calss with various constructor --- include/dragon/token.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/include/dragon/token.h b/include/dragon/token.h index 2a6219a..69ede8a 100644 --- a/include/dragon/token.h +++ b/include/dragon/token.h @@ -2,6 +2,33 @@ #include #include +class Token { +public: + TokenType type; + std::string value; + size_t line; + size_t column; + + Token(TokenType type, std::string value, size_t line, size_t column) { + this->type = type; + this->value = value; + this->line = line; + this->column = column; + } + + Token(TokenType type, std::string value) { + this->type = type; + this->value = value; + this->line = 0; + this->column = 0; + } + + Token(TokenType type) { + this->type = type; + this->value = ""; + this->line = 0; + this->column = 0; + } // Keywords TOKEN_LET_KW, // "let" TOKEN_MUT_KW, // "mut" From 9929f098897ba1d37c0ab7916b9363e9d49e330f Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Sun, 5 Jan 2025 12:26:47 +0000 Subject: [PATCH 43/71] Added comparison operators (==, !=) for tokens --- include/dragon/token.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/include/dragon/token.h b/include/dragon/token.h index 69ede8a..da47dd7 100644 --- a/include/dragon/token.h +++ b/include/dragon/token.h @@ -29,6 +29,14 @@ class Token { this->line = 0; this->column = 0; } + + inline bool operator==(const Token& other) const { + return this->type == other.type && this->value == other.value; + } + + inline bool operator!=(const Token& other) const { + return this->type != other.type || this->value != other.value; + } // Keywords TOKEN_LET_KW, // "let" TOKEN_MUT_KW, // "mut" From ef9e4543d0d6ddb168e3aea06f36d003998a6cc2 Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Sun, 5 Jan 2025 12:27:07 +0000 Subject: [PATCH 44/71] Added Token.to_string for display --- include/dragon/token.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/include/dragon/token.h b/include/dragon/token.h index da47dd7..6e371ca 100644 --- a/include/dragon/token.h +++ b/include/dragon/token.h @@ -37,6 +37,24 @@ class Token { inline bool operator!=(const Token& other) const { return this->type != other.type || this->value != other.value; } + + inline std::string to_string() { + if (this->line == 0 && this->column == 0 && this->value == "") { + return std::format("Token({})", this->type); + } + + if (this->line == 0 && this->column == 0) { + return std::format("Token({}, {})", this->type, this->value); + } + + if (this->value == "") { + return std::format("Token({}, {}, {}, {})", this->type, this->line, this->column); + } + + return std::format("Token({}, {}, {}, {})", this->type, this->value, this->line, this->column); + } +}; + // Keywords TOKEN_LET_KW, // "let" TOKEN_MUT_KW, // "mut" From e99a18986b3a7f212fd70e689a4138218a2bb58c Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Sun, 5 Jan 2025 12:27:24 +0000 Subject: [PATCH 45/71] Added enum class TokenType --- include/dragon/token.h | 165 ++++++++++++----------------------------- 1 file changed, 47 insertions(+), 118 deletions(-) diff --git a/include/dragon/token.h b/include/dragon/token.h index 6e371ca..bd77ce9 100644 --- a/include/dragon/token.h +++ b/include/dragon/token.h @@ -2,6 +2,9 @@ #include #include + +enum class TokenType; + class Token { public: TokenType type; @@ -55,127 +58,53 @@ class Token { } }; +enum class TokenType { // Keywords - TOKEN_LET_KW, // "let" - TOKEN_MUT_KW, // "mut" - TOKEN_INT_KW, // "int" - TOKEN_IF_KW, // "if" - TOKEN_ELSE_KW, // "else" - TOKEN_FOR_KW, // "for" - TOKEN_IN_KW, // "in" - TOKEN_FUNC_KW, // "func" - TOKEN_RETURN_KW, // "return" - TOKEN_WHILE_KW, // "while" - TOKEN_TRUE_KW, // "true" - TOKEN_FALSE_KW, // "false" - TOKEN_BOOL_KW, // "bool" - TOKEN_BREAK_KW, // "break" - TOKEN_CONTINUE_KW, // "continue" - TOKEN_STRUCT_KW, // "struct" - TOKEN_ENUM_KW, // "enum" - TOKEN_TYPE_KW, // "type" - TOKEN_MATCH_KW, // "match" - TOKEN_IMPORT_KW, // "import" - TOKEN_AS_KW, // "as" + Let, + Mut, + If, + Else, + While, + For, + In, + True, + False, // Literals - TOKEN_INTEGER, // 123 - TOKEN_FLOAT, // 123.45 - TOKEN_IDENTIFIER, // variable_name - TOKEN_STRING, // "string" - TOKEN_CHAR, // 'c' + IntegerLiteral, + StringLiteral, + Identifier, // Symbols - TOKEN_EQUALS, // = - TOKEN_PLUS, // + - TOKEN_MINUS, // - - TOKEN_ASTERISK, // * - TOKEN_SLASH, // / - TOKEN_MODULO, // % - TOKEN_AND, // && - TOKEN_OR, // || - TOKEN_NOT, // ! - TOKEN_EQUALITY, // == - TOKEN_NOT_EQ, // != - TOKEN_GRT, // > - TOKEN_LSS, // < - TOKEN_LTE, // <= - TOKEN_GTE, // >= - TOKEN_LSHIFT, // << - TOKEN_RSHIFT, // >> - TOKEN_AMPERSAND, // & - TOKEN_PIPE, // | - TOKEN_CARET, // ^ - TOKEN_TILDE, // ~ - TOKEN_BRACE_OPEN, // { - TOKEN_BRACE_CLOSE, // } - TOKEN_PAREN_OPEN, // ( - TOKEN_PAREN_CLOSE, // ) - TOKEN_COMMA, // , - TOKEN_SEMICOLON, // ; - TOKEN_COLON, // : - TOKEN_DOT, // . - TOKEN_RANGE, // .. - TOKEN_DOUBLE_RIGHT_ARROW, // => - TOKEN_RIGHT_ARROW, // -> - - // Misc - TOKEN_COMMENT, // Comment - TOKEN_EOF, // End of file - TOKEN_INVALID // Invalid token -} TokenType; - -static const char* keywords[] = { - // Variable Declarations - "let", - "mut", - - // Data Types - "int", - "float", - "bool", - "char", - - // Control Flow - "if", - "else", - "for", - "in", - "while", - "break", - "continue", - - // Boolean Literals - "true", - "false", - - // Functions - "func", - "return", - - // Modules and Types - "import", - "struct", - "enum", - "type", - "match", - "as" + Plus, + Minus, + Star, + Slash, + And, + Or, + Not, + Equals, + NotEquals, + LessThan, + GreaterThan, + LessThanOrEqualTo, + GreaterThanOrEqualTo, + Assign, + LeftParen, + RightParen, + LeftBrace, + RightBrace, + LeftBracket, + RightBracket, + Comma, + Dot, + Range, + Ampersand, + Pipe, + Caret, + Tilde, + + // Misc + Comment, + Unknown, }; - - -typedef struct { - TokenType type; - char* value; -} Token; - -typedef struct { - Token* tokens; - size_t count; - size_t capacity; -} TokenList; - -TokenList* create_token_list(); -void append_token(TokenList* list, Token token); -void free_tokens(TokenList* list); - -#endif \ No newline at end of file From f69e52a1901d002735d5bab86c35d7aa9e68315d Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Sun, 5 Jan 2025 12:28:27 +0000 Subject: [PATCH 46/71] Fixed small bugs in string literal test case (lexer) --- tests/lexer_test.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/lexer_test.cpp b/tests/lexer_test.cpp index f135ef1..f002b81 100644 --- a/tests/lexer_test.cpp +++ b/tests/lexer_test.cpp @@ -76,7 +76,7 @@ TEST(LexerTests, StringLiterals) { "\"Abcdefghijklmnopqrstuvwxyz @#][{};;@'><,.//?)(*&^%$£1234567890+_-=`¬\\|\"" }; - const std::array validTokens = { + const std::array validTokens = { Token(TokenType::StringLiteral, "Enter username: "), Token(TokenType::StringLiteral, "This is a string with a escape characters \" \n \t "), Token(TokenType::StringLiteral, "Abcdefghijklmnopqrstuvwxyz @#][{};;@'><,.//?)(*&^%$£1234567890+_-=`¬\\|") @@ -85,7 +85,7 @@ TEST(LexerTests, StringLiterals) { Lexer lexer; for (size_t i = 0; i < input.size(); i++) { - Token t = lexer.lex_string_literal(input[i]); + Token t = lexer.lex_string(input[i]); ASSERT_TRUE(validTokens[i] == t); } } From 6172882a22875db3d4df1d5a0c20721b5d01e4c9 Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Sun, 5 Jan 2025 12:29:03 +0000 Subject: [PATCH 47/71] Replaced test cases for operators and punctuation, with a single test case for all symbols --- tests/lexer_test.cpp | 67 ++++++++++++++++++++++++++------------------ 1 file changed, 39 insertions(+), 28 deletions(-) diff --git a/tests/lexer_test.cpp b/tests/lexer_test.cpp index f002b81..8c6c4b2 100644 --- a/tests/lexer_test.cpp +++ b/tests/lexer_test.cpp @@ -113,42 +113,53 @@ TEST(LexerTests, Integer) { } } -// Test for operators -TEST(LexerTests, Operators) { - const std::array input = { - "+", "-", "*", "/", "=" - }; - const std::array validTokens = { +// Test for all symbols +TEST(LexerTests, Symbols) { + const std::array input = { + "+", + "-", + "*", + "/", + "=", + "==", + "!=", + "<", + ">", + "<=", + ">=", + "&&", + "||", + "!", + "&", + "|", + "^", + "~" + }; + const std::array validTokens = { Token(TokenType::Plus, "+"), Token(TokenType::Minus, "-"), - Token(TokenType::Asterisk, "*"), + Token(TokenType::Star, "*"), Token(TokenType::Slash, "/"), - Token(TokenType::Equal, "=") - }; - - Lexer lexer; - - for (size_t i = 0; i < input.size(); i++) { - Token t = lexer.lex_operator(input[i]); - ASSERT_TRUE(validTokens[i] == t); - } -} - -// Test for punctuation -TEST(LexerTests, Punctuation) { - const std::array input = { - "{", "}", ";" - }; - const std::array validTokens = { - Token(TokenType::LeftBrace, "{"), - Token(TokenType::RightBrace, "}"), - Token(TokenType::Semicolon, ";") + Token(TokenType::Assign, "="), + Token(TokenType::Equals, "=="), + Token(TokenType::NotEquals, "!="), + Token(TokenType::LessThan, "<"), + Token(TokenType::GreaterThan, ">"), + Token(TokenType::LessThanOrEqualTo, "<="), + Token(TokenType::GreaterThanOrEqualTo, ">="), + Token(TokenType::And, "&&"), + Token(TokenType::Or, "||"), + Token(TokenType::Not, "!"), + Token(TokenType::Ampersand, "&"), + Token(TokenType::Pipe, "|"), + Token(TokenType::Caret, "^"), + Token(TokenType::Tilde, "~") }; Lexer lexer; for (size_t i = 0; i < input.size(); i++) { - Token t = lexer.lex_punctuation(input[i]); + Token t = lexer.lex_symbol(input[i]); ASSERT_TRUE(validTokens[i] == t); } } From b78d5bb0c00abeab1b05ca52b4e02a8aa222cc31 Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Sun, 5 Jan 2025 12:29:44 +0000 Subject: [PATCH 48/71] Small changes, mostly renames and switches to std::vector from std::array were necessary --- tests/lexer_test.cpp | 66 ++++++++++++++++++++++---------------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/tests/lexer_test.cpp b/tests/lexer_test.cpp index 8c6c4b2..4162e98 100644 --- a/tests/lexer_test.cpp +++ b/tests/lexer_test.cpp @@ -207,7 +207,7 @@ TEST(LexerTests, Arithmetic) { "1 * 2", "1 / 2" }; - const std::array, 4> validTokens = { + const std::vector> validTokens = { { Token(TokenType::IntegerLiteral, "1"), Token(TokenType::Plus, "+"), @@ -247,7 +247,7 @@ TEST(LexerTests, Boolean) { "true == false" "true != false" }; - const std::array, 4> validTokens = { + const std::vector> validTokens = { { Token(TokenType::True, "true"), Token(TokenType::And, "&&"), @@ -259,12 +259,12 @@ TEST(LexerTests, Boolean) { Token(TokenType::False, "false") }, { - Token(TokenType::Bang, "!"), + Token(TokenType::Not, "!"), Token(TokenType::True, "true") }, { Token(TokenType::True, "true"), - Token(TokenType::EqualEqual, "=="), + Token(TokenType::Equals, "=="), Token(TokenType::False, "false") } }; @@ -285,25 +285,25 @@ TEST(LexerTests, Relational) { "1 <= 2", "1 >= 2" }; - const std::array, 4> validTokens = { + const std::vector> validTokens = { { Token(TokenType::IntegerLiteral, "1"), - Token(TokenType::Less, "<"), + Token(TokenType::LessThan, "<"), Token(TokenType::IntegerLiteral, "2") }, { Token(TokenType::IntegerLiteral, "1"), - Token(TokenType::Greater, ">"), + Token(TokenType::GreaterThan, ">"), Token(TokenType::IntegerLiteral, "2") }, { Token(TokenType::IntegerLiteral, "1"), - Token(TokenType::LessEqual, "<="), + Token(TokenType::LessThanOrEqualTo, "<="), Token(TokenType::IntegerLiteral, "2") }, { Token(TokenType::IntegerLiteral, "1"), - Token(TokenType::GreaterEqual, ">="), + Token(TokenType::LessThanOrEqualTo, ">="), Token(TokenType::IntegerLiteral, "2") } }; @@ -324,7 +324,7 @@ TEST(LexerTests, Bitwise) { "1 ^ 2", "~1" }; - const std::array, 4> validTokens = { + const std::vector> validTokens = { { Token(TokenType::IntegerLiteral, "1"), Token(TokenType::Ampersand, "&"), @@ -367,20 +367,20 @@ TEST(LexerTests, Mixed) { Token(TokenType::IntegerLiteral, "4"), Token(TokenType::Minus, "-"), Token(TokenType::IntegerLiteral, "5"), - Token(TokenType::EqualEqual, "=="), - Token(TokenType::Bang, "!"), + Token(TokenType::Equals, "=="), + Token(TokenType::Not, "!"), Token(TokenType::True, "true"), Token(TokenType::And, "&&"), Token(TokenType::IntegerLiteral, "7"), - Token(TokenType::Less, "<"), + Token(TokenType::LessThan, "<"), Token(TokenType::IntegerLiteral, "8"), Token(TokenType::Or, "||"), Token(TokenType::IntegerLiteral, "9"), - Token(TokenType::Greater, ">"), + Token(TokenType::GreaterThan, ">"), Token(TokenType::IntegerLiteral, "10"), Token(TokenType::And, "&&"), Token(TokenType::IntegerLiteral, "11"), - Token(TokenType::LessEqual, "<="), + Token(TokenType::LessThanOrEqualTo, "<="), Token(TokenType::IntegerLiteral, "12"), Token(TokenType::Pipe, "|"), Token(TokenType::IntegerLiteral, "13"), @@ -419,7 +419,7 @@ TEST (LexerTests, VariableDeclarationWithExpr) { Token(TokenType::Let, "let"), Token(TokenType::Identifier, "variable"), Token(TokenType::Identifier, "int"), - Token(TokenType::Equal, "="), + Token(TokenType::Assign, "="), Token(TokenType::IntegerLiteral, "1"), Token(TokenType::Plus, "+"), Token(TokenType::IntegerLiteral, "2") @@ -437,7 +437,7 @@ TEST(LexerTests, VariableDeclarationWithoutType) { const std::vector validTokens = { Token(TokenType::Let, "let"), Token(TokenType::Identifier, "variable"), - Token(TokenType::Equal, "="), + Token(TokenType::Assign, "="), Token(TokenType::IntegerLiteral, "1"), Token(TokenType::Plus, "+"), Token(TokenType::IntegerLiteral, "2") @@ -457,7 +457,7 @@ TEST(LexerTests, MutableVariableDeclarationWithExpr) { Token(TokenType::Mut, "mut"), Token(TokenType::Identifier, "variable"), Token(TokenType::Identifier, "int"), - Token(TokenType::Equal, "="), + Token(TokenType::Assign, "="), Token(TokenType::IntegerLiteral, "1"), Token(TokenType::Plus, "+"), Token(TokenType::IntegerLiteral, "2") @@ -492,7 +492,7 @@ TEST(LexerTests, MutableVariableDeclarationWithoutType) { Token(TokenType::Let, "let"), Token(TokenType::Mut, "mut"), Token(TokenType::Identifier, "variable"), - Token(TokenType::Equal, "="), + Token(TokenType::Assign, "="), Token(TokenType::IntegerLiteral, "1"), Token(TokenType::Plus, "+"), Token(TokenType::IntegerLiteral, "2") @@ -509,7 +509,7 @@ TEST(LexerTests, Assignment) { const std::string input = "variable = 1 + 2"; const std::vector validTokens = { Token(TokenType::Identifier, "variable"), - Token(TokenType::Equal, "="), + Token(TokenType::Assign, "="), Token(TokenType::IntegerLiteral, "1"), Token(TokenType::Plus, "+"), Token(TokenType::IntegerLiteral, "2") @@ -569,11 +569,11 @@ TEST(LexerTests, WhileLoop) { const std::vector validTokens = { Token(TokenType::While, "while"), Token(TokenType::Identifier, "x"), - Token(TokenType::Less, "<"), + Token(TokenType::LessThan, "<"), Token(TokenType::IntegerLiteral, "10"), Token(TokenType::LeftBrace, "{"), Token(TokenType::Identifier, "x"), - Token(TokenType::Equal, "="), + Token(TokenType::Assign, "="), Token(TokenType::Identifier, "x"), Token(TokenType::Plus, "+"), Token(TokenType::IntegerLiteral, "1"), @@ -598,7 +598,7 @@ TEST(LexerTests, ForLoop) { Token(TokenType::IntegerLiteral, "10"), Token(TokenType::LeftBrace, "{"), Token(TokenType::Identifier, "i"), - Token(TokenType::Equal, "="), + Token(TokenType::Assign, "="), Token(TokenType::Identifier, "i"), Token(TokenType::Plus, "+"), Token(TokenType::IntegerLiteral, "1"), @@ -627,7 +627,7 @@ TEST(LexerTests, ForLoopWithExpr) { Token(TokenType::RightParen, ")"), Token(TokenType::LeftBrace, "{"), Token(TokenType::Identifier, "i"), - Token(TokenType::Equal, "="), + Token(TokenType::Assign, "="), Token(TokenType::Identifier, "i"), Token(TokenType::Plus, "+"), Token(TokenType::IntegerLiteral, "1"), @@ -646,11 +646,11 @@ TEST(LexerTests, IfStatement) { const std::vector validTokens = { Token(TokenType::If, "if"), Token(TokenType::Identifier, "x"), - Token(TokenType::Less, "<"), + Token(TokenType::LessThan, "<"), Token(TokenType::IntegerLiteral, "10"), Token(TokenType::LeftBrace, "{"), Token(TokenType::Identifier, "x"), - Token(TokenType::Equal, "="), + Token(TokenType::Assign, "="), Token(TokenType::Identifier, "x"), Token(TokenType::Plus, "+"), Token(TokenType::IntegerLiteral, "1"), @@ -669,11 +669,11 @@ TEST(LexerTests, ElseIfStatement) { const std::vector validTokens = { Token(TokenType::If, "if"), Token(TokenType::Identifier, "x"), - Token(TokenType::Less, "<"), + Token(TokenType::LessThan, "<"), Token(TokenType::IntegerLiteral, "10"), Token(TokenType::LeftBrace, "{"), Token(TokenType::Identifier, "x"), - Token(TokenType::Equal, "="), + Token(TokenType::Assign, "="), Token(TokenType::Identifier, "x"), Token(TokenType::Plus, "+"), Token(TokenType::IntegerLiteral, "1"), @@ -681,11 +681,11 @@ TEST(LexerTests, ElseIfStatement) { Token(TokenType::Else, "else"), Token(TokenType::If, "if"), Token(TokenType::Identifier, "x"), - Token(TokenType::Greater, ">"), + Token(TokenType::GreaterThan, ">"), Token(TokenType::IntegerLiteral, "10"), Token(TokenType::LeftBrace, "{"), Token(TokenType::Identifier, "x"), - Token(TokenType::Equal, "="), + Token(TokenType::Assign, "="), Token(TokenType::Identifier, "x"), Token(TokenType::Minus, "-"), Token(TokenType::IntegerLiteral, "1"), @@ -704,11 +704,11 @@ TEST(LexerTests, ElseStatement) { const std::vector validTokens = { Token(TokenType::If, "if"), Token(TokenType::Identifier, "x"), - Token(TokenType::Less, "<"), + Token(TokenType::LessThan, "<"), Token(TokenType::IntegerLiteral, "10"), Token(TokenType::LeftBrace, "{"), Token(TokenType::Identifier, "x"), - Token(TokenType::Equal, "="), + Token(TokenType::Assign, "="), Token(TokenType::Identifier, "x"), Token(TokenType::Plus, "+"), Token(TokenType::IntegerLiteral, "1"), @@ -716,7 +716,7 @@ TEST(LexerTests, ElseStatement) { Token(TokenType::Else, "else"), Token(TokenType::LeftBrace, "{"), Token(TokenType::Identifier, "x"), - Token(TokenType::Equal, "="), + Token(TokenType::Assign, "="), Token(TokenType::Identifier, "x"), Token(TokenType::Minus, "-"), Token(TokenType::IntegerLiteral, "1"), From c40c66d6b11885264a06a3e80740efa608912cf4 Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Sun, 5 Jan 2025 16:09:07 +0000 Subject: [PATCH 49/71] Removed token.cpp from CMake --- CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 84c0cb5..236b8a6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,7 +10,6 @@ include_directories(include) set(SOURCES src/lexer.cpp - src/token.cpp src/parser.cpp src/codegen.cpp src/semantics.cpp From 519696bd90f0918984918370ea134fa03735f7d4 Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Sun, 5 Jan 2025 16:09:23 +0000 Subject: [PATCH 50/71] Removed token.cpp from CMake test file --- tests/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index ac473c9..4a1be1a 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -5,7 +5,6 @@ target_include_directories(run_tests PRIVATE ../include) target_sources(run_tests PRIVATE ../src/lexer.cpp - ../src/token.cpp ../src/parser.cpp ../src/semantics.cpp ) From 94d797ba90d09ca8f70ddb7202140b4cc13ee2b7 Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Sun, 5 Jan 2025 16:11:19 +0000 Subject: [PATCH 51/71] Added token_type_to_string for nicer error messages and debugging --- include/dragon/token.h | 166 +++++++++++++++++++++++++++-------------- 1 file changed, 110 insertions(+), 56 deletions(-) diff --git a/include/dragon/token.h b/include/dragon/token.h index bd77ce9..36c4b92 100644 --- a/include/dragon/token.h +++ b/include/dragon/token.h @@ -1,62 +1,6 @@ #pragma once #include -#include - -enum class TokenType; - -class Token { -public: - TokenType type; - std::string value; - size_t line; - size_t column; - - Token(TokenType type, std::string value, size_t line, size_t column) { - this->type = type; - this->value = value; - this->line = line; - this->column = column; - } - - Token(TokenType type, std::string value) { - this->type = type; - this->value = value; - this->line = 0; - this->column = 0; - } - - Token(TokenType type) { - this->type = type; - this->value = ""; - this->line = 0; - this->column = 0; - } - - inline bool operator==(const Token& other) const { - return this->type == other.type && this->value == other.value; - } - - inline bool operator!=(const Token& other) const { - return this->type != other.type || this->value != other.value; - } - - inline std::string to_string() { - if (this->line == 0 && this->column == 0 && this->value == "") { - return std::format("Token({})", this->type); - } - - if (this->line == 0 && this->column == 0) { - return std::format("Token({}, {})", this->type, this->value); - } - - if (this->value == "") { - return std::format("Token({}, {}, {}, {})", this->type, this->line, this->column); - } - - return std::format("Token({}, {}, {}, {})", this->type, this->value, this->line, this->column); - } -}; enum class TokenType { // Keywords @@ -108,3 +52,113 @@ enum class TokenType { Comment, Unknown, }; + +inline std::string token_type_to_string(TokenType type) { + switch (type) { + case TokenType::Let: return "Let"; + case TokenType::Mut: return "Mut"; + case TokenType::If: return "If"; + case TokenType::Else: return "Else"; + case TokenType::While: return "While"; + case TokenType::For: return "For"; + case TokenType::In: return "In"; + case TokenType::True: return "True"; + case TokenType::False: return "False"; + case TokenType::IntegerLiteral: return "IntegerLiteral"; + case TokenType::StringLiteral: return "StringLiteral"; + case TokenType::Identifier: return "Identifier"; + case TokenType::Plus: return "Plus"; + case TokenType::Minus: return "Minus"; + case TokenType::Star: return "Star"; + case TokenType::Slash: return "Slash"; + case TokenType::And: return "And"; + case TokenType::Or: return "Or"; + case TokenType::Not: return "Not"; + case TokenType::Equals: return "Equals"; + case TokenType::NotEquals: return "NotEquals"; + case TokenType::LessThan: return "LessThan"; + case TokenType::GreaterThan: return "GreaterThan"; + case TokenType::LessThanOrEqualTo: return "LessThanOrEqualTo"; + case TokenType::GreaterThanOrEqualTo: return "GreaterThanOrEqualTo"; + case TokenType::Assign: return "Assign"; + case TokenType::LeftParen: return "LeftParen"; + case TokenType::RightParen: return "RightParen"; + case TokenType::LeftBrace: return "LeftBrace"; + case TokenType::RightBrace: return "RightBrace"; + case TokenType::LeftBracket: return "LeftBracket"; + case TokenType::RightBracket: return "RightBracket"; + case TokenType::Comma: return "Comma"; + case TokenType::Dot: return "Dot"; + case TokenType::Range: return "Range"; + case TokenType::Ampersand: return "Ampersand"; + case TokenType::Pipe: return "Pipe"; + case TokenType::Caret: return "Caret"; + case TokenType::Tilde: return "Tilde"; + case TokenType::Comment: return "Comment"; + case TokenType::Unknown: return "Unknown"; + default: return "Unknown"; + } +} + +class Token { +public: + TokenType type; + std::string value; + size_t line; + size_t column; + + Token(TokenType type, std::string value, size_t line, size_t column) { + this->type = type; + this->value = value; + this->line = line; + this->column = column; + } + + Token(TokenType type, std::string value) { + this->type = type; + this->value = value; + this->line = 0; + this->column = 0; + } + + Token(TokenType type) { + this->type = type; + this->value = ""; + this->line = 0; + this->column = 0; + } + + inline bool operator==(const Token& other) const { + return this->type == other.type && this->value == other.value; + } + + inline bool operator!=(const Token& other) const { + return this->type != other.type || this->value != other.value; + } + + inline std::string to_string() { + std::stringstream ss; + ss << "Token(" << token_type_to_string(this->type); + if (!this->value.empty()) { + ss << ", " << this->value; + } + if (this->line != 0 || this->column != 0) { + ss << ", " << this->line << ", " << this->column; + } + ss << ")"; + return ss.str(); + } +}; + +inline std::string token_vector_to_string(std::vector tokens) { + std::stringstream ss; + ss << "["; + for (size_t i = 0; i < tokens.size(); i++) { + ss << tokens[i].to_string(); + if (i < tokens.size() - 1) { + ss << ", "; + } + } + ss << "]"; + return ss.str(); +} \ No newline at end of file From 3f5e8a78a9263e405812a78d58a8cd10c67ba534 Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Sun, 5 Jan 2025 16:25:36 +0000 Subject: [PATCH 52/71] Improved debug error messages for numerous test cases (lexer) --- tests/lexer_test.cpp | 128 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 112 insertions(+), 16 deletions(-) diff --git a/tests/lexer_test.cpp b/tests/lexer_test.cpp index 4162e98..db3bcc0 100644 --- a/tests/lexer_test.cpp +++ b/tests/lexer_test.cpp @@ -394,7 +394,13 @@ TEST(LexerTests, Mixed) { std::vector tokens = lexer.lex(input); - ASSERT_TRUE(validTokens == tokens); + ASSERT_EQ(validTokens.size(), tokens.size()) + << "Failed on input: " << input + << " ( recieved a size of " << tokens.size() << ", expected a size of " << validTokens.size() << ")"; + + ASSERT_EQ(validTokens, tokens) + << "Failed on input: " << input + << " ( recieved: " << token_vector_to_string(tokens) << ", expected: " << token_vector_to_string(validTokens) << ")"; } // let @@ -409,7 +415,13 @@ TEST(LexerTests, VariableDeclarationWithoutExpr) { std::vector tokens = lexer.lex(input); - ASSERT_TRUE(validTokens == tokens); + ASSERT_EQ(validTokens.size(), tokens.size()) + << "Failed on input: " << input + << " ( recieved a size of " << tokens.size() << ", expected a size of " << validTokens.size() << ")"; + + ASSERT_EQ(validTokens, tokens) + << "Failed on input: " << input + << " ( recieved: " << token_vector_to_string(tokens) << ", expected: " << token_vector_to_string(validTokens) << ")"; } // let = @@ -428,7 +440,13 @@ TEST (LexerTests, VariableDeclarationWithExpr) { std::vector tokens = lexer.lex(input); - ASSERT_TRUE(validTokens == tokens); + ASSERT_EQ(validTokens.size(), tokens.size()) + << "Failed on input: " << input + << " ( recieved a size of " << tokens.size() << ", expected a size of " << validTokens.size() << ")"; + + ASSERT_EQ(validTokens, tokens) + << "Failed on input: " << input + << " ( recieved: " << token_vector_to_string(tokens) << ", expected: " << token_vector_to_string(validTokens) << ")"; } // let = @@ -446,7 +464,13 @@ TEST(LexerTests, VariableDeclarationWithoutType) { std::vector tokens = lexer.lex(input); - ASSERT_TRUE(validTokens == tokens); + ASSERT_EQ(validTokens.size(), tokens.size()) + << "Failed on input: " << input + << " ( recieved a size of " << tokens.size() << ", expected a size of " << validTokens.size() << ")"; + + ASSERT_EQ(validTokens, tokens) + << "Failed on input: " << input + << " ( recieved: " << token_vector_to_string(tokens) << ", expected: " << token_vector_to_string(validTokens) << ")"; } // let mut = @@ -466,7 +490,13 @@ TEST(LexerTests, MutableVariableDeclarationWithExpr) { std::vector tokens = lexer.lex(input); - ASSERT_TRUE(validTokens == tokens); + ASSERT_EQ(validTokens.size(), tokens.size()) + << "Failed on input: " << input + << " ( recieved a size of " << tokens.size() << ", expected a size of " << validTokens.size() << ")"; + + ASSERT_EQ(validTokens, tokens) + << "Failed on input: " << input + << " ( recieved: " << token_vector_to_string(tokens) << ", expected: " << token_vector_to_string(validTokens) << ")"; } // let mut @@ -482,7 +512,13 @@ TEST(LexerTests, MutableVariableDeclarationWithoutExpr) { std::vector tokens = lexer.lex(input); - ASSERT_TRUE(validTokens == tokens); + ASSERT_EQ(validTokens.size(), tokens.size()) + << "Failed on input: " << input + << " ( recieved a size of " << tokens.size() << ", expected a size of " << validTokens.size() << ")"; + + ASSERT_EQ(validTokens, tokens) + << "Failed on input: " << input + << " ( recieved: " << token_vector_to_string(tokens) << ", expected: " << token_vector_to_string(validTokens) << ")"; } // let mut = @@ -501,7 +537,13 @@ TEST(LexerTests, MutableVariableDeclarationWithoutType) { std::vector tokens = lexer.lex(input); - ASSERT_TRUE(validTokens == tokens); + ASSERT_EQ(validTokens.size(), tokens.size()) + << "Failed on input: " << input + << " ( recieved a size of " << tokens.size() << ", expected a size of " << validTokens.size() << ")"; + + ASSERT_EQ(validTokens, tokens) + << "Failed on input: " << input + << " ( recieved: " << token_vector_to_string(tokens) << ", expected: " << token_vector_to_string(validTokens) << ")"; } // = @@ -518,7 +560,13 @@ TEST(LexerTests, Assignment) { std::vector tokens = lexer.lex(input); - ASSERT_TRUE(validTokens == tokens); + ASSERT_EQ(validTokens.size(), tokens.size()) + << "Failed on input: " << input + << " ( recieved a size of " << tokens.size() << ", expected a size of " << validTokens.size() << ")"; + + ASSERT_EQ(validTokens, tokens) + << "Failed on input: " << input + << " ( recieved: " << token_vector_to_string(tokens) << ", expected: " << token_vector_to_string(validTokens) << ")"; } // @@ -538,7 +586,13 @@ TEST(LexerTests, Arguments) { std::vector tokens = lexer.lex(input); - ASSERT_TRUE(validTokens == tokens); + ASSERT_EQ(validTokens.size(), tokens.size()) + << "Failed on input: " << input + << " ( recieved a size of " << tokens.size() << ", expected a size of " << validTokens.size() << ")"; + + ASSERT_EQ(validTokens, tokens) + << "Failed on input: " << input + << " ( recieved: " << token_vector_to_string(tokens) << ", expected: " << token_vector_to_string(validTokens) << ")"; } // () @@ -560,7 +614,13 @@ TEST(LexerTests, FunctionCall) { std::vector tokens = lexer.lex(input); - ASSERT_TRUE(validTokens == tokens); + ASSERT_EQ(validTokens.size(), tokens.size()) + << "Failed on input: " << input + << " ( recieved a size of " << tokens.size() << ", expected a size of " << validTokens.size() << ")"; + + ASSERT_EQ(validTokens, tokens) + << "Failed on input: " << input + << " ( recieved: " << token_vector_to_string(tokens) << ", expected: " << token_vector_to_string(validTokens) << ")"; } // while { } @@ -583,7 +643,13 @@ TEST(LexerTests, WhileLoop) { std::vector tokens = lexer.lex(input); - ASSERT_TRUE(validTokens == tokens); + ASSERT_EQ(validTokens.size(), tokens.size()) + << "Failed on input: " << input + << " ( recieved a size of " << tokens.size() << ", expected a size of " << validTokens.size() << ")"; + + ASSERT_EQ(validTokens, tokens) + << "Failed on input: " << input + << " ( recieved: " << token_vector_to_string(tokens) << ", expected: " << token_vector_to_string(validTokens) << ")"; } // for in { } @@ -608,7 +674,13 @@ TEST(LexerTests, ForLoop) { std::vector tokens = lexer.lex(input); - ASSERT_TRUE(validTokens == tokens); + ASSERT_EQ(validTokens.size(), tokens.size()) + << "Failed on input: " << input + << " ( recieved a size of " << tokens.size() << ", expected a size of " << validTokens.size() << ")"; + + ASSERT_EQ(validTokens, tokens) + << "Failed on input: " << input + << " ( recieved: " << token_vector_to_string(tokens) << ", expected: " << token_vector_to_string(validTokens) << ")"; } // for in { } @@ -637,7 +709,13 @@ TEST(LexerTests, ForLoopWithExpr) { std::vector tokens = lexer.lex(input); - ASSERT_TRUE(validTokens == tokens); + ASSERT_EQ(validTokens.size(), tokens.size()) + << "Failed on input: " << input + << " ( recieved a size of " << tokens.size() << ", expected a size of " << validTokens.size() << ")"; + + ASSERT_EQ(validTokens, tokens) + << "Failed on input: " << input + << " ( recieved: " << token_vector_to_string(tokens) << ", expected: " << token_vector_to_string(validTokens) << ")"; } // if { } @@ -660,7 +738,13 @@ TEST(LexerTests, IfStatement) { std::vector tokens = lexer.lex(input); - ASSERT_TRUE(validTokens == tokens); + ASSERT_EQ(validTokens.size(), tokens.size()) + << "Failed on input: " << input + << " ( recieved a size of " << tokens.size() << ", expected a size of " << validTokens.size() << ")"; + + ASSERT_EQ(validTokens, tokens) + << "Failed on input: " << input + << " ( recieved: " << token_vector_to_string(tokens) << ", expected: " << token_vector_to_string(validTokens) << ")"; } // else if { } @@ -695,7 +779,13 @@ TEST(LexerTests, ElseIfStatement) { std::vector tokens = lexer.lex(input); - ASSERT_TRUE(validTokens == tokens); + ASSERT_EQ(validTokens.size(), tokens.size()) + << "Failed on input: " << input + << " ( recieved a size of " << tokens.size() << ", expected a size of " << validTokens.size() << ")"; + + ASSERT_EQ(validTokens, tokens) + << "Failed on input: " << input + << " ( recieved: " << token_vector_to_string(tokens) << ", expected: " << token_vector_to_string(validTokens) << ")"; } // else { } @@ -726,5 +816,11 @@ TEST(LexerTests, ElseStatement) { std::vector tokens = lexer.lex(input); - ASSERT_TRUE(validTokens == tokens); + ASSERT_EQ(validTokens.size(), tokens.size()) + << "Failed on input: " << input + << " ( recieved a size of " << tokens.size() << ", expected a size of " << validTokens.size() << ")"; + + ASSERT_EQ(validTokens, tokens) + << "Failed on input: " << input + << " ( recieved: " << token_vector_to_string(tokens) << ", expected: " << token_vector_to_string(validTokens) << ")"; } \ No newline at end of file From 207229eeaddc5711ad8e1bd0776e83a7f7bc9945 Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Sun, 5 Jan 2025 16:26:27 +0000 Subject: [PATCH 53/71] Renamed all expression test cases to be more descriptive --- tests/lexer_test.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/lexer_test.cpp b/tests/lexer_test.cpp index db3bcc0..7fb2ae8 100644 --- a/tests/lexer_test.cpp +++ b/tests/lexer_test.cpp @@ -200,7 +200,7 @@ TEST(LexerTests, MultiLineComments) { // // Arithmetic -TEST(LexerTests, Arithmetic) { +TEST(LexerTests, ArithmeticExpression) { const std::array input = { "1 + 2", "1 - 2", @@ -239,7 +239,7 @@ TEST(LexerTests, Arithmetic) { // // Boolean -TEST(LexerTests, Boolean) { +TEST(LexerTests, BooleanExpression) { const std::array input = { "true && false", "true || false", @@ -278,7 +278,7 @@ TEST(LexerTests, Boolean) { // // Relational -TEST(LexerTests, Relational) { +TEST(LexerTests, RelationalExpression) { const std::array input = { "1 < 2", "1 > 2", @@ -317,7 +317,7 @@ TEST(LexerTests, Relational) { // // bitwise -TEST(LexerTests, Bitwise) { +TEST(LexerTests, BitwiseExpression) { const std::array input = { "1 & 2", "1 | 2", @@ -355,7 +355,7 @@ TEST(LexerTests, Bitwise) { // // Mixed -TEST(LexerTests, Mixed) { +TEST(LexerTests, MixedExpression) { const std::string input = "1 + 2 * 3 / 4 - 5 == !true && 7 < 8 || 9 > 10 && 11 <= 12 | 13 & 14 ^ 15"; const std::vector validTokens = { Token(TokenType::IntegerLiteral, "1"), From 465a19002973aabe32e12b74daf65e7eab405c47 Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Sun, 5 Jan 2025 16:27:26 +0000 Subject: [PATCH 54/71] Improved error debugging messages for and --- tests/lexer_test.cpp | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/tests/lexer_test.cpp b/tests/lexer_test.cpp index 7fb2ae8..3e867a6 100644 --- a/tests/lexer_test.cpp +++ b/tests/lexer_test.cpp @@ -31,8 +31,15 @@ TEST(LexerTests, Identifier) { Lexer lexer; for (size_t i = 0; i < input.size(); i++) { - Token t = lexer.lex_identifier(input[i]); - ASSERT_TRUE(validTokens[i] == t); + std::vector tokens = lexer.lex(input[i]); + + ASSERT_EQ(tokens.size(), 1) + << "Failed on input: " << input[i] + << " ( recieved a size of " << tokens.size() << ", expected a size of 1)"; + + ASSERT_EQ(validTokens[i], tokens[0]) + << "Failed on input: " << input[i] + << " ( recieved: " << token_vector_to_string(tokens) << ", expected: " << validTokens[i].to_string() << ")"; } } @@ -63,8 +70,15 @@ TEST(LexerTests, Keywords) { Lexer lexer; for (size_t i = 0; i < input.size(); i++) { - Token t = lexer.lex_identifier(input[i]); - ASSERT_TRUE(validTokens[i] == t); + std::vector tokens = lexer.lex(input[i]); + + ASSERT_EQ(tokens.size(), 1) + << "Failed on input: " << input[i] + << " ( recieved a size of " << tokens.size() << ", expected a size of 1)"; + + ASSERT_EQ(validTokens[i], tokens[0]) + << "Failed on input: " << input[i] + << " ( recieved: " << token_vector_to_string(tokens) << ", expected: " << validTokens[i].to_string() << ")"; } } From b2b5207dc232e84bef61ff5b8e0c64775fb4dcb3 Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Sun, 5 Jan 2025 16:28:02 +0000 Subject: [PATCH 55/71] and literals have updated debug messages --- tests/lexer_test.cpp | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/tests/lexer_test.cpp b/tests/lexer_test.cpp index 3e867a6..274024b 100644 --- a/tests/lexer_test.cpp +++ b/tests/lexer_test.cpp @@ -99,8 +99,15 @@ TEST(LexerTests, StringLiterals) { Lexer lexer; for (size_t i = 0; i < input.size(); i++) { - Token t = lexer.lex_string(input[i]); - ASSERT_TRUE(validTokens[i] == t); + std::vector tokens = lexer.lex(input[i]); + + ASSERT_EQ(tokens.size(), 1) + << "Failed on input: " << input[i] + << " ( recieved a size of " << tokens.size() << ", expected a size of 1)"; + + ASSERT_EQ(validTokens[i], tokens[0]) + << "Failed on input: " << input[i] + << " ( recieved: " << token_vector_to_string(tokens) << ", expected: " << validTokens[i].to_string() << ")"; } } @@ -122,8 +129,15 @@ TEST(LexerTests, Integer) { Lexer lexer; for (size_t i = 0; i < input.size(); i++) { - Token t = lexer.lex_number(input[i]); - ASSERT_TRUE(validTokens[i] == t); + std::vector tokens = lexer.lex(input[i]); + + ASSERT_EQ(tokens.size(), 1) + << "Failed on input: " << input[i] + << " ( recieved a size of " << tokens.size() << ", expected a size of 1)"; + + ASSERT_EQ(validTokens[i], tokens[0]) + << "Failed on input: " << input[i] + << " ( recieved: " << token_vector_to_string(tokens) << ", expected: " << validTokens[i].to_string() << ")"; } } From 8f67d1068735a54b164c54abdf3fdec1c15c437a Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Sun, 5 Jan 2025 16:28:43 +0000 Subject: [PATCH 56/71] Both single, and multi, line comments now provide better error messages (testing) --- tests/lexer_test.cpp | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/tests/lexer_test.cpp b/tests/lexer_test.cpp index 274024b..6ea43ad 100644 --- a/tests/lexer_test.cpp +++ b/tests/lexer_test.cpp @@ -204,8 +204,15 @@ TEST(LexerTests, SingleLineComments) { Lexer lexer; for (size_t i = 0; i < input.size(); i++) { - Token t = lexer.lex_comment(input[i]); - ASSERT_TRUE(validTokens[i] == t); + std::vector tokens = lexer.lex(input[i]); + + ASSERT_EQ(tokens.size(), 1) + << "Failed on input: " << input[i] + << " ( recieved a size of " << tokens.size() << ", expected a size of 1)"; + + ASSERT_EQ(validTokens[i], tokens[0]) + << "Failed on input: " << input[i] + << " ( recieved: " << token_vector_to_string(tokens) << ", expected: " << validTokens[i].to_string() << ")"; } } @@ -221,8 +228,15 @@ TEST(LexerTests, MultiLineComments) { Lexer lexer; for (size_t i = 0; i < input.size(); i++) { - Token t = lexer.lex_comment(input[i]); - ASSERT_TRUE(validTokens[i] == t); + std::vector tokens = lexer.lex(input[i]); + + ASSERT_EQ(tokens.size(), 1) + << "Failed on input: " << input[i] + << " ( recieved a size of " << tokens.size() << ", expected a size of 1)"; + + ASSERT_EQ(validTokens[i], tokens[0]) + << "Failed on input: " << input[i] + << " ( recieved: " << token_vector_to_string(tokens) << ", expected: " << validTokens[i].to_string() << ")"; } } From 6d52c1e84f4ced08b32c8b2747b3398835b5f79a Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Sun, 5 Jan 2025 16:29:15 +0000 Subject: [PATCH 57/71] Bug fixes and minor changes to some test cases --- tests/lexer_test.cpp | 370 +++++++++++++++++++++---------------------- 1 file changed, 178 insertions(+), 192 deletions(-) diff --git a/tests/lexer_test.cpp b/tests/lexer_test.cpp index 6ea43ad..39be1e6 100644 --- a/tests/lexer_test.cpp +++ b/tests/lexer_test.cpp @@ -1,32 +1,26 @@ #include #include - +#include +#include +#include #include "dragon/lexer.h" #include "dragon/token.h" // TEST(LexerTests, Identifier) { - const std::array input = { - "x", + const std::vector input = { + "x", "x_y123", "reallyLongVariableNameWithNoNumbersOrUnderscores", "U_ND_ER_SCO_RES", "____starting___with__underscore", - "2thisShouldError_", // Identifiers cannot start with a number - "this should also error", // Variables cannot contain spaces - "Error?", // Identifiers cannot contain question marks - "#*&$£!!!", // Identifiers cannot contain any of these symbols }; - const std::vector validTokens = { - Token(TokenType::Identifier, "x"), - Token(TokenType::Identifier, "x_y123"), - Token(TokenType::Identifier, "reallyLongVariableNameWithNoNumbersOrUnderscores"), - Token(TokenType::Identifier, "U_ND_ER_SCO_RES"), - Token(TokenType::Identifier, "____starting___with__underscore"), - Token(TokenType::Unknown, "2thisShouldError_"), - Token(TokenType::Unknown, "this should also error"), - Token(TokenType::Unknown, "Error?"), - Token(TokenType::Unknown, "#*&$£!!!") + std::vector validTokens = { + Token(TokenType::Identifier, "x"), + Token(TokenType::Identifier, "x_y123"), + Token(TokenType::Identifier, "reallyLongVariableNameWithNoNumbersOrUnderscores"), + Token(TokenType::Identifier, "U_ND_ER_SCO_RES"), + Token(TokenType::Identifier, "____starting___with__underscore"), }; Lexer lexer; @@ -45,7 +39,7 @@ TEST(LexerTests, Identifier) { // TEST(LexerTests, Keywords) { - const std::array input = { + const std::vector input = { "let", "mut", "if", @@ -56,7 +50,7 @@ TEST(LexerTests, Keywords) { "false", "while", }; - const std::vector validTokens = { + std::vector validTokens = { Token(TokenType::Let, "let"), Token(TokenType::Mut, "mut"), Token(TokenType::If, "if"), @@ -79,22 +73,21 @@ TEST(LexerTests, Keywords) { ASSERT_EQ(validTokens[i], tokens[0]) << "Failed on input: " << input[i] << " ( recieved: " << token_vector_to_string(tokens) << ", expected: " << validTokens[i].to_string() << ")"; - } + } } // TEST(LexerTests, StringLiterals) { - const std::array input = { + const std::vector input = { "\"Enter username: \"", "\"This is a string with a escape characters \\\" \\n \\t \"", "\"Abcdefghijklmnopqrstuvwxyz @#][{};;@'><,.//?)(*&^%$£1234567890+_-=`¬\\|\"" }; - const std::array validTokens = { + std::vector validTokens = { Token(TokenType::StringLiteral, "Enter username: "), Token(TokenType::StringLiteral, "This is a string with a escape characters \" \n \t "), - Token(TokenType::StringLiteral, "Abcdefghijklmnopqrstuvwxyz @#][{};;@'><,.//?)(*&^%$£1234567890+_-=`¬\\|") - }; + Token(TokenType::StringLiteral, "Abcdefghijklmnopqrstuvwxyz @#][{};;@'><,.//?)(*&^%$£1234567890+_-=`¬\\|")}; Lexer lexer; @@ -113,18 +106,17 @@ TEST(LexerTests, StringLiterals) { // TEST(LexerTests, Integer) { - const std::array input = { + const std::vector input = { "1", "123", "0", - "1_000_000", // Underscores are allowed, but are ignored + "1_000_000", // Underscores are allowed, but are ignored }; - const std::array validTokens = { + std::vector validTokens = { Token(TokenType::IntegerLiteral, "1"), Token(TokenType::IntegerLiteral, "123"), Token(TokenType::IntegerLiteral, "0"), - Token(TokenType::IntegerLiteral, "1_000_000") - }; + Token(TokenType::IntegerLiteral, "1000000")}; Lexer lexer; @@ -143,7 +135,7 @@ TEST(LexerTests, Integer) { // Test for all symbols TEST(LexerTests, Symbols) { - const std::array input = { + const std::vector input = { "+", "-", "*", @@ -161,9 +153,8 @@ TEST(LexerTests, Symbols) { "&", "|", "^", - "~" - }; - const std::array validTokens = { + "~"}; + std::vector validTokens = { Token(TokenType::Plus, "+"), Token(TokenType::Minus, "-"), Token(TokenType::Star, "*"), @@ -181,25 +172,30 @@ TEST(LexerTests, Symbols) { Token(TokenType::Ampersand, "&"), Token(TokenType::Pipe, "|"), Token(TokenType::Caret, "^"), - Token(TokenType::Tilde, "~") - }; + Token(TokenType::Tilde, "~")}; Lexer lexer; for (size_t i = 0; i < input.size(); i++) { - Token t = lexer.lex_symbol(input[i]); - ASSERT_TRUE(validTokens[i] == t); + std::vector tokens = lexer.lex(input[i]); + + ASSERT_EQ(tokens.size(), 1) + << "Failed on input: " << input[i] + << " ( recieved a size of " << tokens.size() << ", expected a size of 1)"; + + ASSERT_EQ(validTokens[i], tokens[0]) + << "Failed on input: " << input[i] + << " ( recieved: " << token_vector_to_string(tokens) << ", expected: " << validTokens[i].to_string() << ")"; } } +// // Test for single-line comments TEST(LexerTests, SingleLineComments) { - const std::array input = { - "// This is a comment" - }; - const std::array validTokens = { - Token(TokenType::Comment, "// This is a comment") - }; + const std::vector input = { + "// This is a comment"}; + std::vector validTokens = { + Token(TokenType::Comment, "// This is a comment")}; Lexer lexer; @@ -216,14 +212,13 @@ TEST(LexerTests, SingleLineComments) { } } +// // Test for multi-line comments TEST(LexerTests, MultiLineComments) { - const std::array input = { - "/* This is a multi-line comment */" - }; - const std::array validTokens = { - Token(TokenType::Comment, "/* This is a multi-line comment */") - }; + const std::vector input = { + "/* This is a multi-line comment */"}; + std::vector validTokens = { + Token(TokenType::Comment, "/* This is a multi-line comment */")}; Lexer lexer; @@ -240,165 +235,172 @@ TEST(LexerTests, MultiLineComments) { } } -// +// // Arithmetic TEST(LexerTests, ArithmeticExpression) { - const std::array input = { + const std::vector input = { "1 + 2", "1 - 2", "1 * 2", - "1 / 2" - }; + "1 / 2"}; const std::vector> validTokens = { - { - Token(TokenType::IntegerLiteral, "1"), - Token(TokenType::Plus, "+"), - Token(TokenType::IntegerLiteral, "2") - }, - { - Token(TokenType::IntegerLiteral, "1"), - Token(TokenType::Minus, "-"), - Token(TokenType::IntegerLiteral, "2") - }, - { - Token(TokenType::IntegerLiteral, "1"), - Token(TokenType::Star, "*"), - Token(TokenType::IntegerLiteral, "2") - }, - { - Token(TokenType::IntegerLiteral, "1"), - Token(TokenType::Slash, "/"), - Token(TokenType::IntegerLiteral, "2") - } - }; + {Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::Plus, "+"), + Token(TokenType::IntegerLiteral, "2")}, + {Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::Minus, "-"), + Token(TokenType::IntegerLiteral, "2")}, + {Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::Star, "*"), + Token(TokenType::IntegerLiteral, "2")}, + {Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::Slash, "/"), + Token(TokenType::IntegerLiteral, "2")}}; Lexer lexer; - for (size_t i = 0; i < input.size(); i++) { + for (size_t i = 0; i < input.size(); i++) + { std::vector tokens = lexer.lex(input[i]); - ASSERT_TRUE(validTokens[i] == tokens); + + ASSERT_EQ(validTokens[i].size(), tokens.size()) + << "Failed on input: " << (input[i]) + << " ( recieved a size of " << tokens.size() << ", expected a size of " << validTokens[i].size() << ")"; + + for (size_t j = 0; j < tokens.size(); j++) + { + ASSERT_EQ(validTokens[i][j], tokens[j]) + << "Failed on input: " << (input[i]) + << " ( recieved: " << token_vector_to_string(tokens) << ", expected: " << token_vector_to_string(validTokens[i]) << ")"; + } } } -// +// // Boolean TEST(LexerTests, BooleanExpression) { - const std::array input = { + const std::vector input = { "true && false", "true || false", "!true", - "true == false" - "true != false" - }; + "true == false", + "true != false"}; const std::vector> validTokens = { - { - Token(TokenType::True, "true"), - Token(TokenType::And, "&&"), - Token(TokenType::False, "false") - }, - { - Token(TokenType::True, "true"), - Token(TokenType::Or, "||"), - Token(TokenType::False, "false") - }, - { - Token(TokenType::Not, "!"), - Token(TokenType::True, "true") - }, - { - Token(TokenType::True, "true"), - Token(TokenType::Equals, "=="), - Token(TokenType::False, "false") - } - }; + {Token(TokenType::True, "true"), + Token(TokenType::And, "&&"), + Token(TokenType::False, "false")}, + {Token(TokenType::True, "true"), + Token(TokenType::Or, "||"), + Token(TokenType::False, "false")}, + {Token(TokenType::Not, "!"), + Token(TokenType::True, "true")}, + {Token(TokenType::True, "true"), + Token(TokenType::Equals, "=="), + Token(TokenType::False, "false")}, + {Token(TokenType::True, "true"), + Token(TokenType::NotEquals, "!="), + Token(TokenType::False, "false")}}; Lexer lexer; - for (size_t i = 0; i < input.size(); i++) { + for (size_t i = 0; i < input.size(); i++) + { std::vector tokens = lexer.lex(input[i]); - ASSERT_TRUE(validTokens[i] == tokens); + + ASSERT_EQ(validTokens[i].size(), tokens.size()) + << "Failed on input: " << (input[i]) + << " ( recieved a size of " << tokens.size() << ", expected a size of " << validTokens[i].size() << ")"; + + for (size_t j = 0; j < tokens.size(); j++) + { + ASSERT_EQ(validTokens[i][j], tokens[j]) + << "Failed on input: " << input[i] + << " ( recieved: " << token_vector_to_string(tokens) << ", expected: " << token_vector_to_string(validTokens[i]) << ")"; + } } } -// +// // Relational TEST(LexerTests, RelationalExpression) { - const std::array input = { + const std::vector input = { "1 < 2", "1 > 2", "1 <= 2", - "1 >= 2" - }; + "1 >= 2"}; const std::vector> validTokens = { - { - Token(TokenType::IntegerLiteral, "1"), - Token(TokenType::LessThan, "<"), - Token(TokenType::IntegerLiteral, "2") - }, - { - Token(TokenType::IntegerLiteral, "1"), - Token(TokenType::GreaterThan, ">"), - Token(TokenType::IntegerLiteral, "2") - }, - { - Token(TokenType::IntegerLiteral, "1"), - Token(TokenType::LessThanOrEqualTo, "<="), - Token(TokenType::IntegerLiteral, "2") - }, - { - Token(TokenType::IntegerLiteral, "1"), - Token(TokenType::LessThanOrEqualTo, ">="), - Token(TokenType::IntegerLiteral, "2") - } - }; + {Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::LessThan, "<"), + Token(TokenType::IntegerLiteral, "2")}, + {Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::GreaterThan, ">"), + Token(TokenType::IntegerLiteral, "2")}, + {Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::LessThanOrEqualTo, "<="), + Token(TokenType::IntegerLiteral, "2")}, + {Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::GreaterThanOrEqualTo, ">="), + Token(TokenType::IntegerLiteral, "2")}}; Lexer lexer; - for (size_t i = 0; i < input.size(); i++) { + for (size_t i = 0; i < input.size(); i++) + { std::vector tokens = lexer.lex(input[i]); - ASSERT_TRUE(validTokens[i] == tokens); + + ASSERT_EQ(validTokens[i].size(), tokens.size()) + << "Failed on input: " << (input[i]) + << " ( recieved a size of " << tokens.size() << ", expected a size of " << validTokens[i].size() << ")"; + + for (size_t j = 0; j < tokens.size(); j++) + { + ASSERT_EQ(validTokens[i][j], tokens[j]) + << "Failed on input: " << input[i] + << " ( recieved: " << token_vector_to_string(tokens) << ", expected: " << token_vector_to_string(validTokens[i]) << ")"; + } } } -// +// // bitwise TEST(LexerTests, BitwiseExpression) { - const std::array input = { + const std::vector input = { "1 & 2", "1 | 2", "1 ^ 2", - "~1" - }; + "~1"}; const std::vector> validTokens = { - { - Token(TokenType::IntegerLiteral, "1"), - Token(TokenType::Ampersand, "&"), - Token(TokenType::IntegerLiteral, "2") - }, - { - Token(TokenType::IntegerLiteral, "1"), - Token(TokenType::Pipe, "|"), - Token(TokenType::IntegerLiteral, "2") - }, - { - Token(TokenType::IntegerLiteral, "1"), - Token(TokenType::Caret, "^"), - Token(TokenType::IntegerLiteral, "2") - }, - { - Token(TokenType::Tilde, "~"), - Token(TokenType::IntegerLiteral, "1") - } - }; + {Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::Ampersand, "&"), + Token(TokenType::IntegerLiteral, "2")}, + {Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::Pipe, "|"), + Token(TokenType::IntegerLiteral, "2")}, + {Token(TokenType::IntegerLiteral, "1"), + Token(TokenType::Caret, "^"), + Token(TokenType::IntegerLiteral, "2")}, + {Token(TokenType::Tilde, "~"), + Token(TokenType::IntegerLiteral, "1")}}; Lexer lexer; - for (size_t i = 0; i < input.size(); i++) { + for (size_t i = 0; i < input.size(); i++) + { std::vector tokens = lexer.lex(input[i]); - ASSERT_TRUE(validTokens[i] == tokens); + + ASSERT_EQ(validTokens[i].size(), tokens.size()) + << "Failed on input: " << (input[i]) + << " ( recieved a size of " << tokens.size() << ", expected a size of " << validTokens[i].size() << ")"; + + for (size_t j = 0; j < tokens.size(); j++) + { + ASSERT_EQ(validTokens[i][j], tokens[j]) + << "Failed on input: " << input[i] + << " ( recieved: " << token_vector_to_string(tokens) << ", expected: " << token_vector_to_string(validTokens[i]) << ")"; + } } } // // Mixed TEST(LexerTests, MixedExpression) { - const std::string input = "1 + 2 * 3 / 4 - 5 == !true && 7 < 8 || 9 > 10 && 11 <= 12 | 13 & 14 ^ 15"; + const std::string input = "1 + 2 * 3 / 4 - 5 == !true && 7 < 8 || 9 > 10 && 11 <= 12 | 13 & 14 ^ 15"; const std::vector validTokens = { Token(TokenType::IntegerLiteral, "1"), Token(TokenType::Plus, "+"), @@ -429,8 +431,7 @@ TEST(LexerTests, MixedExpression) { Token(TokenType::Ampersand, "&"), Token(TokenType::IntegerLiteral, "14"), Token(TokenType::Caret, "^"), - Token(TokenType::IntegerLiteral, "15") - }; + Token(TokenType::IntegerLiteral, "15")}; Lexer lexer; @@ -448,11 +449,10 @@ TEST(LexerTests, MixedExpression) { // let TEST(LexerTests, VariableDeclarationWithoutExpr) { const std::string input = "let variable int"; - const std::vector validTokens = { + const std::vector validTokens = { Token(TokenType::Let, "let"), Token(TokenType::Identifier, "variable"), - Token(TokenType::Identifier, "int") - }; + Token(TokenType::Identifier, "int")}; Lexer lexer; std::vector tokens = lexer.lex(input); @@ -467,7 +467,7 @@ TEST(LexerTests, VariableDeclarationWithoutExpr) { } // let = -TEST (LexerTests, VariableDeclarationWithExpr) { +TEST(LexerTests, VariableDeclarationWithExpr) { const std::string input = "let variable int = 1 + 2"; const std::vector validTokens = { Token(TokenType::Let, "let"), @@ -476,8 +476,7 @@ TEST (LexerTests, VariableDeclarationWithExpr) { Token(TokenType::Assign, "="), Token(TokenType::IntegerLiteral, "1"), Token(TokenType::Plus, "+"), - Token(TokenType::IntegerLiteral, "2") - }; + Token(TokenType::IntegerLiteral, "2")}; Lexer lexer; std::vector tokens = lexer.lex(input); @@ -500,8 +499,7 @@ TEST(LexerTests, VariableDeclarationWithoutType) { Token(TokenType::Assign, "="), Token(TokenType::IntegerLiteral, "1"), Token(TokenType::Plus, "+"), - Token(TokenType::IntegerLiteral, "2") - }; + Token(TokenType::IntegerLiteral, "2")}; Lexer lexer; std::vector tokens = lexer.lex(input); @@ -526,8 +524,7 @@ TEST(LexerTests, MutableVariableDeclarationWithExpr) { Token(TokenType::Assign, "="), Token(TokenType::IntegerLiteral, "1"), Token(TokenType::Plus, "+"), - Token(TokenType::IntegerLiteral, "2") - }; + Token(TokenType::IntegerLiteral, "2")}; Lexer lexer; std::vector tokens = lexer.lex(input); @@ -541,15 +538,14 @@ TEST(LexerTests, MutableVariableDeclarationWithExpr) { << " ( recieved: " << token_vector_to_string(tokens) << ", expected: " << token_vector_to_string(validTokens) << ")"; } -// let mut +// let mut TEST(LexerTests, MutableVariableDeclarationWithoutExpr) { const std::string input = "let mut variable int"; const std::vector validTokens = { Token(TokenType::Let, "let"), Token(TokenType::Mut, "mut"), Token(TokenType::Identifier, "variable"), - Token(TokenType::Identifier, "int") - }; + Token(TokenType::Identifier, "int")}; Lexer lexer; std::vector tokens = lexer.lex(input); @@ -573,8 +569,7 @@ TEST(LexerTests, MutableVariableDeclarationWithoutType) { Token(TokenType::Assign, "="), Token(TokenType::IntegerLiteral, "1"), Token(TokenType::Plus, "+"), - Token(TokenType::IntegerLiteral, "2") - }; + Token(TokenType::IntegerLiteral, "2")}; Lexer lexer; std::vector tokens = lexer.lex(input); @@ -596,8 +591,7 @@ TEST(LexerTests, Assignment) { Token(TokenType::Assign, "="), Token(TokenType::IntegerLiteral, "1"), Token(TokenType::Plus, "+"), - Token(TokenType::IntegerLiteral, "2") - }; + Token(TokenType::IntegerLiteral, "2")}; Lexer lexer; std::vector tokens = lexer.lex(input); @@ -622,8 +616,7 @@ TEST(LexerTests, Arguments) { Token(TokenType::Comma, ","), Token(TokenType::IntegerLiteral, "1"), Token(TokenType::Plus, "+"), - Token(TokenType::IntegerLiteral, "3") - }; + Token(TokenType::IntegerLiteral, "3")}; Lexer lexer; std::vector tokens = lexer.lex(input); @@ -650,8 +643,7 @@ TEST(LexerTests, FunctionCall) { Token(TokenType::IntegerLiteral, "1"), Token(TokenType::Plus, "+"), Token(TokenType::IntegerLiteral, "3"), - Token(TokenType::RightParen, ")") - }; + Token(TokenType::RightParen, ")")}; Lexer lexer; std::vector tokens = lexer.lex(input); @@ -679,8 +671,7 @@ TEST(LexerTests, WhileLoop) { Token(TokenType::Identifier, "x"), Token(TokenType::Plus, "+"), Token(TokenType::IntegerLiteral, "1"), - Token(TokenType::RightBrace, "}") - }; + Token(TokenType::RightBrace, "}")}; Lexer lexer; std::vector tokens = lexer.lex(input); @@ -710,8 +701,7 @@ TEST(LexerTests, ForLoop) { Token(TokenType::Identifier, "i"), Token(TokenType::Plus, "+"), Token(TokenType::IntegerLiteral, "1"), - Token(TokenType::RightBrace, "}") - }; + Token(TokenType::RightBrace, "}")}; Lexer lexer; std::vector tokens = lexer.lex(input); @@ -745,8 +735,7 @@ TEST(LexerTests, ForLoopWithExpr) { Token(TokenType::Identifier, "i"), Token(TokenType::Plus, "+"), Token(TokenType::IntegerLiteral, "1"), - Token(TokenType::RightBrace, "}") - }; + Token(TokenType::RightBrace, "}")}; Lexer lexer; std::vector tokens = lexer.lex(input); @@ -774,8 +763,7 @@ TEST(LexerTests, IfStatement) { Token(TokenType::Identifier, "x"), Token(TokenType::Plus, "+"), Token(TokenType::IntegerLiteral, "1"), - Token(TokenType::RightBrace, "}") - }; + Token(TokenType::RightBrace, "}")}; Lexer lexer; std::vector tokens = lexer.lex(input); @@ -815,8 +803,7 @@ TEST(LexerTests, ElseIfStatement) { Token(TokenType::Identifier, "x"), Token(TokenType::Minus, "-"), Token(TokenType::IntegerLiteral, "1"), - Token(TokenType::RightBrace, "}") - }; + Token(TokenType::RightBrace, "}")}; Lexer lexer; std::vector tokens = lexer.lex(input); @@ -852,8 +839,7 @@ TEST(LexerTests, ElseStatement) { Token(TokenType::Identifier, "x"), Token(TokenType::Minus, "-"), Token(TokenType::IntegerLiteral, "1"), - Token(TokenType::RightBrace, "}") - }; + Token(TokenType::RightBrace, "}")}; Lexer lexer; std::vector tokens = lexer.lex(input); From f646200595479af6cf19b04155a57883397925b7 Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Sun, 5 Jan 2025 16:29:46 +0000 Subject: [PATCH 58/71] Setting up lexer class declaration --- include/dragon/lexer.h | 27 ++++++--------------------- 1 file changed, 6 insertions(+), 21 deletions(-) diff --git a/include/dragon/lexer.h b/include/dragon/lexer.h index 6eca3de..c0e2ae0 100644 --- a/include/dragon/lexer.h +++ b/include/dragon/lexer.h @@ -1,24 +1,9 @@ -#ifndef LEXER_H -#define LEXER_H +#pragma once +#include +#include +#include +#include #include "token.h" -#include -#include -#include -#include -typedef struct { - char* source; - size_t position; -} Lexer; - -Lexer* create_lexer(const char* source); -Token lex_number(Lexer* lexer); -Token lex_identifier(Lexer* lexer); -Token lex_symbol(Lexer* lexer); -Token lex_string(Lexer* lexer); -Token is_keyword(Token token); -TokenList* tokenise(const char* source); -void free_lexer(Lexer* lexer); - -#endif // LEXER_H \ No newline at end of file +class Lexer { From 91a492fe6c7ba7f15b9f9775c9458d1734f56b2f Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Sun, 5 Jan 2025 16:30:08 +0000 Subject: [PATCH 59/71] Added all methods and data for lexer class --- include/dragon/lexer.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/include/dragon/lexer.h b/include/dragon/lexer.h index c0e2ae0..159acd8 100644 --- a/include/dragon/lexer.h +++ b/include/dragon/lexer.h @@ -7,3 +7,32 @@ #include "token.h" class Lexer { +public: + Lexer() = default; + Lexer(std::string input); + + std::vector lex(std::string input); + std::vector lex(); + + void reset(); + + Token lex_identifier(); + Token lex_number(); + Token lex_string(); + Token lex_symbol(); + Token lex_single_line_comment(); + Token lex_multi_line_comment(); + + TokenType get_keyword(std::string input); + +private: + std::vector tokens; + std::string input; + size_t index = 0; + size_t line = 1; + size_t column = 1; + + std::optional peek() const; + std::optional peek_next() const; + std::optional advance(); +}; \ No newline at end of file From 51f8fda88da9f566ec935dd659ff5804a8a72065 Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Sun, 5 Jan 2025 16:30:40 +0000 Subject: [PATCH 60/71] Implemented lexer constructor --- src/lexer.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/lexer.cpp b/src/lexer.cpp index d2a3fcf..f45e988 100644 --- a/src/lexer.cpp +++ b/src/lexer.cpp @@ -1,10 +1,11 @@ #include "dragon/lexer.h" -Lexer* create_lexer(const char* source) { - Lexer* lexer = (Lexer *)malloc(sizeof(Lexer)); - lexer->source = strdup(source); - lexer->position = 0; - return lexer; +#include + +Lexer::Lexer(std::string input) { + this->input = input; +} + } void free_lexer(Lexer* lexer) { From 2bb94230e69abdc87af3ae7d5298ab205ad6f895 Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Sun, 5 Jan 2025 16:30:59 +0000 Subject: [PATCH 61/71] Implemented lexer lex(string) --- src/lexer.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/lexer.cpp b/src/lexer.cpp index f45e988..ef1eab7 100644 --- a/src/lexer.cpp +++ b/src/lexer.cpp @@ -6,11 +6,11 @@ Lexer::Lexer(std::string input) { this->input = input; } +std::vector Lexer::lex(std::string input) { + if (!this->input.empty()) this->reset(); + this->input = input; + return this->lex(); } - -void free_lexer(Lexer* lexer) { - free(lexer->source); - free(lexer); } TokenList* tokenise(const char* source) { From 27a2c3a928e4c21b7a28f38a9f38f9dd87edf70a Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Sun, 5 Jan 2025 16:31:12 +0000 Subject: [PATCH 62/71] Implemented lexer reset --- src/lexer.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/lexer.cpp b/src/lexer.cpp index ef1eab7..5099e1d 100644 --- a/src/lexer.cpp +++ b/src/lexer.cpp @@ -11,13 +11,13 @@ std::vector Lexer::lex(std::string input) { this->input = input; return this->lex(); } -} - -TokenList* tokenise(const char* source) { - TokenList* tokens = create_token_list(); - if (source == NULL || strlen(source) == 0) { - return tokens; +void Lexer::reset() { + this->tokens.clear(); + this->input = ""; + this->index = 0; + this->line = 1; + this->column = 1; } Lexer* lexer = create_lexer(source); From 0c7bc6765f31f08ca7688a092874fb911ee340a3 Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Sun, 5 Jan 2025 16:31:35 +0000 Subject: [PATCH 63/71] Implemented lexer peek, peek_next, and advance --- src/lexer.cpp | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/src/lexer.cpp b/src/lexer.cpp index 5099e1d..6065498 100644 --- a/src/lexer.cpp +++ b/src/lexer.cpp @@ -158,21 +158,33 @@ Token lex_string(Lexer* lexer) { lexer->position++; } - size_t length = lexer->position - start; - char* value = strndup(lexer->source + start, length); - lexer->position++; +std::optional Lexer::peek() const { + if (this->index < this->input.size()) { + return this->input[this->index]; + } + return std::nullopt; +} - Token token = {TOKEN_STRING, value}; - return token; +std::optional Lexer::peek_next() const { + if (this->index + 1 < this->input.size()) { + return this->input[this->index + 1]; + } + return std::nullopt; } -Token is_keyword(Token token) { - for (size_t i = 0; i < sizeof(keywords) / sizeof(keywords[0]); i++) { - if (strcmp(token.value, keywords[i]) == 0) { - token.type = (TokenType)i; - return token; +std::optional Lexer::advance() { + if (this->index < this->input.size()) { + char c = this->input[this->index]; + this->index++; + this->column++; + + if (c == '\n') { + this->line++; + this->column = 1; } + + return c; } - return token; -} + return std::nullopt; +} \ No newline at end of file From 81a06e8b16f5a9d92f0bb250d9fe72121ae19af6 Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Sun, 5 Jan 2025 16:31:49 +0000 Subject: [PATCH 64/71] Added get_keyword --- src/lexer.cpp | 42 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 36 insertions(+), 6 deletions(-) diff --git a/src/lexer.cpp b/src/lexer.cpp index 6065498..e0c31ca 100644 --- a/src/lexer.cpp +++ b/src/lexer.cpp @@ -149,15 +149,45 @@ Token lex_symbol(Lexer* lexer) { return token; } -Token lex_string(Lexer* lexer) { - char quote = lexer->source[lexer->position]; - lexer->position++; +TokenType Lexer::get_keyword(std::string value) { + if (value == "let") { + return TokenType::Let; + } - size_t start = lexer->position; - while (lexer->source[lexer->position] != quote) { - lexer->position++; + if (value == "mut") { + return TokenType::Mut; + } + + if (value == "if") { + return TokenType::If; + } + + if (value == "else") { + return TokenType::Else; + } + + if (value == "while") { + return TokenType::While; } + if (value == "for") { + return TokenType::For; + } + + if (value == "in") { + return TokenType::In; + } + + if (value == "true") { + return TokenType::True; + } + + if (value == "false") { + return TokenType::False; + } + + return TokenType::Identifier; + } std::optional Lexer::peek() const { if (this->index < this->input.size()) { From 531c9e71a6b3d2ac824f923af1dd940d65d70a30 Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Sun, 5 Jan 2025 16:32:12 +0000 Subject: [PATCH 65/71] Implemented main lex function :D --- src/lexer.cpp | 51 +++++++++++++++++++++++++++++++++------------------ 1 file changed, 33 insertions(+), 18 deletions(-) diff --git a/src/lexer.cpp b/src/lexer.cpp index e0c31ca..df42ace 100644 --- a/src/lexer.cpp +++ b/src/lexer.cpp @@ -20,33 +20,48 @@ void Lexer::reset() { this->column = 1; } - Lexer* lexer = create_lexer(source); +std::vector Lexer::lex() { + while (this->index < this->input.size()) { + auto opt_c = this->peek(); + if (!opt_c.has_value()) break; + char c = opt_c.value(); + std::cout << "lexing starting with: " << c << std::endl; + + if (std::isspace(c)) { + this->advance(); + continue; + } - while (lexer->position < strlen(lexer->source)) { - char c = lexer->source[lexer->position]; + if (std::isalpha(c) || c == '_') { + this->tokens.push_back(this->lex_identifier()); + continue; + } - while (c == ' ' || c == '\n' || c == '\t') { - lexer->position++; - c = lexer->source[lexer->position]; + if (std::isdigit(c)) { + this->tokens.push_back(this->lex_number()); + continue; } - Token token = {TOKEN_INVALID, NULL}; - if (isdigit(c)) { - token = lex_number(lexer); - } else if (isalpha(c)) { - token = lex_identifier(lexer); - } else if (c == '"' || c == '\'') { - token = lex_string(lexer); - } else { - token = lex_symbol(lexer); + if (c == '"') { + std::cout << "lexing string" << std::endl; + this->tokens.push_back(this->lex_string()); + continue; } - append_token(tokens, token); + if (c == '/' && this->peek_next() == '/') { + this->tokens.push_back(this->lex_single_line_comment()); + continue; + } + + if (c == '/' && this->peek_next() == '*') { + this->tokens.push_back(this->lex_multi_line_comment()); + continue; } - free_lexer(lexer); + this->tokens.push_back(this->lex_symbol()); + } - return tokens; + return this->tokens; } Token lex_number(Lexer* lexer) { From 8ee6fec7cefe8c3ece286ec60017fd44b5a41c8c Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Sun, 5 Jan 2025 16:32:27 +0000 Subject: [PATCH 66/71] Implemented lex_identifier --- src/lexer.cpp | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/src/lexer.cpp b/src/lexer.cpp index df42ace..cbc469d 100644 --- a/src/lexer.cpp +++ b/src/lexer.cpp @@ -64,10 +64,24 @@ std::vector Lexer::lex() { return this->tokens; } -Token lex_number(Lexer* lexer) { - size_t start = lexer->position; - while (isdigit(lexer->source[lexer->position])) { - lexer->position++; +Token Lexer::lex_identifier() { + std::string value = ""; + size_t line = this->line; + size_t column = this->column; + + while (true) { + auto opt_c = this->peek(); + if (!opt_c.has_value() || !(std::isalnum(opt_c.value()) || opt_c.value() == '_')) { + break; + } + value += this->advance().value(); + std::cout << "building value: " << value << std::endl; + } + + TokenType type = this->get_keyword(value); + std::cout << "type: " << token_type_to_string(type) << std::endl; + std::cout << "value: " << "\"" << value << "\"" << std::endl; + return Token(type, value, line, column); } size_t length = lexer->position - start; From 702612ec7b786bae79f56ec053168d2d1bfe5ee8 Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Sun, 5 Jan 2025 16:32:39 +0000 Subject: [PATCH 67/71] Implemented lex_number --- src/lexer.cpp | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/src/lexer.cpp b/src/lexer.cpp index cbc469d..9b451a9 100644 --- a/src/lexer.cpp +++ b/src/lexer.cpp @@ -84,11 +84,24 @@ Token Lexer::lex_identifier() { return Token(type, value, line, column); } - size_t length = lexer->position - start; - char* value = strndup(lexer->source + start, length); +Token Lexer::lex_number() { + std::string value = ""; + size_t line = this->line; + size_t column = this->column; - Token token = {TOKEN_INTEGER, value}; - return token; + while (true) { + auto opt_c = this->peek(); + if (opt_c.has_value() && opt_c.value() == '_') { + this->advance(); + continue; + } + if (!opt_c.has_value() || !std::isdigit(opt_c.value())) { + break; + } + value += this->advance().value(); + } + + return Token(TokenType::IntegerLiteral, value, line, column); } Token lex_identifier(Lexer* lexer) { From 996fc55622330d9621304769f166bf0bad1e6734 Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Sun, 5 Jan 2025 16:33:04 +0000 Subject: [PATCH 68/71] Implemented lex_string --- src/lexer.cpp | 40 ++++++++++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/src/lexer.cpp b/src/lexer.cpp index 9b451a9..0efdd4d 100644 --- a/src/lexer.cpp +++ b/src/lexer.cpp @@ -104,20 +104,40 @@ Token Lexer::lex_number() { return Token(TokenType::IntegerLiteral, value, line, column); } -Token lex_identifier(Lexer* lexer) { - size_t start = lexer->position; - while (isalnum(lexer->source[lexer->position])) { - lexer->position++; - } +Token Lexer::lex_string() { + std::string value = ""; + size_t line = this->line; + size_t column = this->column; - size_t length = lexer->position - start; - char* value = strndup(lexer->source + start, length); + this->advance(); // Skip the opening quote - Token token = {TOKEN_IDENTIFIER, value}; + while (true) { + auto opt_c = this->peek(); + if (!opt_c.has_value() || opt_c.value() == '"') { + break; + } + + if (opt_c.value() == '\\') { + this->advance(); // Skip the backslash + auto escaped_char = this->advance(); + if (escaped_char.has_value()) { + switch (escaped_char.value()) { + case 'n': value += '\n'; break; + case 't': value += '\t'; break; + case 'r': value += '\r'; break; + case '\\': value += '\\'; break; + case '"': value += '"'; break; + default: value += '\\'; value += escaped_char.value(); break; + } + } + } else { + value += this->advance().value(); + } + } - token = is_keyword(token); + this->advance(); // Skip the closing quote - return token; + return Token(TokenType::StringLiteral, value, line, column); } Token lex_symbol(Lexer* lexer) { From 9d58782c52c94ef15e46f95e5efcb3ec75959049 Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Sun, 5 Jan 2025 16:33:25 +0000 Subject: [PATCH 69/71] Implemented lex_symbol --- src/lexer.cpp | 138 +++++++++++++++++++++++++++++++++++--------------- 1 file changed, 98 insertions(+), 40 deletions(-) diff --git a/src/lexer.cpp b/src/lexer.cpp index 0efdd4d..4f73d2e 100644 --- a/src/lexer.cpp +++ b/src/lexer.cpp @@ -140,53 +140,111 @@ Token Lexer::lex_string() { return Token(TokenType::StringLiteral, value, line, column); } -Token lex_symbol(Lexer* lexer) { - char c = lexer->source[lexer->position]; - TokenType type = TOKEN_INVALID; - char* value = NULL; +Token Lexer::lex_symbol() { + std::string value = ""; + size_t line = this->line; + size_t column = this->column; + + auto opt_c = this->peek(); + if (!opt_c.has_value()) { + return Token(TokenType::Unknown, value, line, column); + } + char c = opt_c.value(); switch (c) { - case '=': - type = TOKEN_EQUALS; - value = strndup(lexer->source + lexer->position, 1); - break; case '+': - type = TOKEN_PLUS; - value = strndup(lexer->source + lexer->position, 1); - break; - case '{': - type = TOKEN_BRACE_OPEN; - value = strndup(lexer->source + lexer->position, 1); - break; - case '}': - type = TOKEN_BRACE_CLOSE; - value = strndup(lexer->source + lexer->position, 1); - break; + value += this->advance().value(); + return Token(TokenType::Plus, value, line, column); + case '-': + value += this->advance().value(); + return Token(TokenType::Minus, value, line, column); + case '*': + value += this->advance().value(); + return Token(TokenType::Star, value, line, column); + case '/': + value += this->advance().value(); + return Token(TokenType::Slash, value, line, column); + case '!': + value += this->advance().value(); + if (auto next = this->peek(); next.has_value() && next.value() == '=') { + value += this->advance().value(); + return Token(TokenType::NotEquals, value, line, column); + } + return Token(TokenType::Not, value, line, column); + case '=': + value += this->advance().value(); + if (auto next = this->peek(); next.has_value() && next.value() == '=') { + value += this->advance().value(); + return Token(TokenType::Equals, value, line, column); + } + return Token(TokenType::Assign, value, line, column); + case '<': + value += this->advance().value(); + if (auto next = this->peek(); next.has_value() && next.value() == '=') { + value += this->advance().value(); + return Token(TokenType::LessThanOrEqualTo, value, line, column); + } + return Token(TokenType::LessThan, value, line, column); + case '>': + value += this->advance().value(); + if (auto next = this->peek(); next.has_value() && next.value() == '=') { + value += this->advance().value(); + return Token(TokenType::GreaterThanOrEqualTo, value, line, column); + } + return Token(TokenType::GreaterThan, value, line, column); + case '&': + value += this->advance().value(); + if (auto next = this->peek(); next.has_value() && next.value() == '&') { + value += this->advance().value(); + return Token(TokenType::And, value, line, column); + } + return Token(TokenType::Ampersand, value, line, column); + case '|': + value += this->advance().value(); + if (auto next = this->peek(); next.has_value() && next.value() == '|') { + value += this->advance().value(); + return Token(TokenType::Or, value, line, column); + } + return Token(TokenType::Pipe, value, line, column); + case '^': + value += this->advance().value(); + return Token(TokenType::Caret, value, line, column); + case '~': + value += this->advance().value(); + return Token(TokenType::Tilde, value, line, column); case '(': - type = TOKEN_PAREN_OPEN; - value = strndup(lexer->source + lexer->position, 1); - break; + value += this->advance().value(); + return Token(TokenType::LeftParen, value, line, column); case ')': - type = TOKEN_PAREN_CLOSE; - value = strndup(lexer->source + lexer->position, 1); - break; + value += this->advance().value(); + return Token(TokenType::RightParen, value, line, column); + case '{': + value += this->advance().value(); + return Token(TokenType::LeftBrace, value, line, column); + case '}': + value += this->advance().value(); + return Token(TokenType::RightBrace, value, line, column); + case '[': + value += this->advance().value(); + return Token(TokenType::LeftBracket, value, line, column); + case ']': + value += this->advance().value(); + return Token(TokenType::RightBracket, value, line, column); case ',': - type = TOKEN_COMMA; - value = strndup(lexer->source + lexer->position, 1); - break; - case '>': - type = TOKEN_GRT; - value = strndup(lexer->source + lexer->position, 1); - break; - case '-': - if (lexer->source[lexer->position + 1] == '>') { - type = TOKEN_RIGHT_ARROW; - value = strndup(lexer->source + lexer->position, 2); - lexer->position++; - } else { - type = TOKEN_INVALID; - value = strndup(lexer->source + lexer->position, 1); + value += this->advance().value(); + return Token(TokenType::Comma, value, line, column); + case '.': + value += this->advance().value(); + if (auto next = this->peek(); next.has_value() && next.value() == '.') { + value += this->advance().value(); + return Token(TokenType::Range, value, line, column); } + return Token(TokenType::Dot, value, line, column); + default: + value += this->advance().value(); + return Token(TokenType::Unknown, value, line, column); + } +} break; case '.': printf("Next char: %c\n", lexer->source[lexer->position + 1]); From 1442bfac67fd4311e7cc24c4040c98fa9e54001e Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Sun, 5 Jan 2025 16:33:43 +0000 Subject: [PATCH 70/71] Implemented lex_single_line_comment for // comment --- src/lexer.cpp | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/src/lexer.cpp b/src/lexer.cpp index 4f73d2e..2042b92 100644 --- a/src/lexer.cpp +++ b/src/lexer.cpp @@ -245,17 +245,22 @@ Token Lexer::lex_symbol() { return Token(TokenType::Unknown, value, line, column); } } + +Token Lexer::lex_single_line_comment() { + std::string value = ""; + size_t line = this->line; + size_t column = this->column; + + while (true) { + auto opt_c = this->peek(); + if (!opt_c.has_value() || opt_c.value() == '\n') { break; - case '.': - printf("Next char: %c\n", lexer->source[lexer->position + 1]); - if (lexer->source[lexer->position + 1] == '.') { - type = TOKEN_RANGE; - value = strndup(lexer->source + lexer->position, 2); - lexer->position++; - } else { - type = TOKEN_INVALID; - value = strndup(lexer->source + lexer->position, 1); - } + } + value += this->advance().value(); + } + + return Token(TokenType::Comment, value, line, column); +} break; default: type = TOKEN_INVALID; From 068737b9232cdfd745f4d34b7f50b3a6b15d81bb Mon Sep 17 00:00:00 2001 From: hrszpuk <107559570+hrszpuk@users.noreply.github.com> Date: Sun, 5 Jan 2025 16:34:09 +0000 Subject: [PATCH 71/71] Implemented lex_multi_line_comment --- src/lexer.cpp | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/src/lexer.cpp b/src/lexer.cpp index 2042b92..860da36 100644 --- a/src/lexer.cpp +++ b/src/lexer.cpp @@ -18,7 +18,7 @@ void Lexer::reset() { this->index = 0; this->line = 1; this->column = 1; - } +} std::vector Lexer::lex() { while (this->index < this->input.size()) { @@ -56,7 +56,7 @@ std::vector Lexer::lex() { if (c == '/' && this->peek_next() == '*') { this->tokens.push_back(this->lex_multi_line_comment()); continue; - } + } this->tokens.push_back(this->lex_symbol()); } @@ -82,7 +82,7 @@ Token Lexer::lex_identifier() { std::cout << "type: " << token_type_to_string(type) << std::endl; std::cout << "value: " << "\"" << value << "\"" << std::endl; return Token(type, value, line, column); - } +} Token Lexer::lex_number() { std::string value = ""; @@ -261,17 +261,27 @@ Token Lexer::lex_single_line_comment() { return Token(TokenType::Comment, value, line, column); } + +Token Lexer::lex_multi_line_comment() { + std::string value = ""; + size_t line = this->line; + size_t column = this->column; + + while (true) { + auto opt_c = this->peek(); + auto opt_next_c = this->peek_next(); + if (!opt_c.has_value() || !opt_next_c.has_value()) { break; - default: - type = TOKEN_INVALID; - value = strndup(lexer->source + lexer->position, 1); + } + if (opt_c.value() == '*' && opt_next_c.value() == '/') { + value += this->advance().value(); + value += this->advance().value(); break; + } + value += this->advance().value(); } - lexer->position++; - - Token token = {type, value}; - return token; + return Token(TokenType::Comment, value, line, column); } TokenType Lexer::get_keyword(std::string value) { @@ -312,7 +322,7 @@ TokenType Lexer::get_keyword(std::string value) { } return TokenType::Identifier; - } +} std::optional Lexer::peek() const { if (this->index < this->input.size()) {