Skip to content

Commit

Permalink
Merge pull request #16 from dragonfly-lang/dev
Browse files Browse the repository at this point in the history
Lexical analyser / Scanner fully implemented including tests
  • Loading branch information
hrszpuk authored Jan 5, 2025
2 parents 899d889 + ff31051 commit 11a4517
Show file tree
Hide file tree
Showing 14 changed files with 1,316 additions and 763 deletions.
3 changes: 1 addition & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,10 @@ include_directories(include)

set(SOURCES
src/lexer.cpp
src/token.cpp
src/parser.cpp
src/vm.cpp
src/codegen.cpp
src/semantics.cpp
src/main.cpp
)

add_executable(dragon src/main.cpp ${SOURCES})
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Dragon
# Dragonfly Compiler
A high-level multi-paradigm programming language.

```
Expand Down
1 change: 0 additions & 1 deletion examples/Common Programming Concepts/Operators.drg
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,5 @@
& | ^
> >= <= <=
>> <<
~
? :
*/
58 changes: 36 additions & 22 deletions include/dragon/lexer.h
Original file line number Diff line number Diff line change
@@ -1,24 +1,38 @@
#ifndef LEXER_H
#define LEXER_H
#pragma once

#include <string>
#include <vector>
#include <sstream>
#include <optional>
#include "token.h"
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <ctype.h>

typedef struct {
char* source;
size_t position;
} Lexer;

Lexer* create_lexer(const char* source);
Token lex_number(Lexer* lexer);
Token lex_identifier(Lexer* lexer);
Token lex_symbol(Lexer* lexer);
Token lex_string(Lexer* lexer);
Token is_keyword(Token token);
TokenList* tokenise(const char* source);
void free_lexer(Lexer* lexer);

#endif // LEXER_H

class Lexer {
public:
Lexer() = default;
Lexer(std::string input);

std::vector<Token> lex(std::string input);
std::vector<Token> lex();

void reset();

Token lex_identifier();
Token lex_number();
Token lex_string();
Token lex_symbol();
Token lex_single_line_comment();
Token lex_multi_line_comment();

TokenType get_keyword(std::string input);

private:
std::vector<Token> tokens;
std::string input;
size_t index = 0;
size_t line = 1;
size_t column = 1;

std::optional<char> peek() const;
std::optional<char> peek_next() const;
std::optional<char> advance();
};
272 changes: 153 additions & 119 deletions include/dragon/token.h
Original file line number Diff line number Diff line change
@@ -1,130 +1,164 @@
#ifndef TOKEN_H
#define TOKEN_H
#pragma once

#include <stddef.h>
#include <string>

typedef enum {
enum class TokenType {
// Keywords
TOKEN_LET_KW, // "let"
TOKEN_MUT_KW, // "mut"
TOKEN_INT_KW, // "int"
TOKEN_IF_KW, // "if"
TOKEN_ELSE_KW, // "else"
TOKEN_FOR_KW, // "for"
TOKEN_IN_KW, // "in"
TOKEN_FUNC_KW, // "func"
TOKEN_RETURN_KW, // "return"
TOKEN_WHILE_KW, // "while"
TOKEN_TRUE_KW, // "true"
TOKEN_FALSE_KW, // "false"
TOKEN_BOOL_KW, // "bool"
TOKEN_BREAK_KW, // "break"
TOKEN_CONTINUE_KW, // "continue"
TOKEN_STRUCT_KW, // "struct"
TOKEN_ENUM_KW, // "enum"
TOKEN_TYPE_KW, // "type"
TOKEN_MATCH_KW, // "match"
TOKEN_IMPORT_KW, // "import"
TOKEN_AS_KW, // "as"
Let,
Mut,
If,
Else,
While,
For,
In,
True,
False,

// Literals
TOKEN_INTEGER, // 123
TOKEN_FLOAT, // 123.45
TOKEN_IDENTIFIER, // variable_name
TOKEN_STRING, // "string"
TOKEN_CHAR, // 'c'
IntegerLiteral,
StringLiteral,
Identifier,

// Symbols
TOKEN_EQUALS, // =
TOKEN_PLUS, // +
TOKEN_MINUS, // -
TOKEN_ASTERISK, // *
TOKEN_SLASH, // /
TOKEN_MODULO, // %
TOKEN_AND, // &&
TOKEN_OR, // ||
TOKEN_NOT, // !
TOKEN_EQUALITY, // ==
TOKEN_NOT_EQ, // !=
TOKEN_GRT, // >
TOKEN_LSS, // <
TOKEN_LTE, // <=
TOKEN_GTE, // >=
TOKEN_LSHIFT, // <<
TOKEN_RSHIFT, // >>
TOKEN_AMPERSAND, // &
TOKEN_PIPE, // |
TOKEN_CARET, // ^
TOKEN_TILDE, // ~
TOKEN_BRACE_OPEN, // {
TOKEN_BRACE_CLOSE, // }
TOKEN_PAREN_OPEN, // (
TOKEN_PAREN_CLOSE, // )
TOKEN_COMMA, // ,
TOKEN_SEMICOLON, // ;
TOKEN_COLON, // :
TOKEN_DOT, // .
TOKEN_RANGE, // ..
TOKEN_DOUBLE_RIGHT_ARROW, // =>
TOKEN_RIGHT_ARROW, // ->

// Misc
TOKEN_COMMENT, // Comment
TOKEN_EOF, // End of file
TOKEN_INVALID // Invalid token
} TokenType;

static const char* keywords[] = {
// Variable Declarations
"let",
"mut",

// Data Types
"int",
"float",
"bool",
"char",

// Control Flow
"if",
"else",
"for",
"in",
"while",
"break",
"continue",

// Boolean Literals
"true",
"false",

// Functions
"func",
"return",

// Modules and Types
"import",
"struct",
"enum",
"type",
"match",
"as"
Plus,
Minus,
Star,
Slash,
And,
Or,
Not,
Equals,
NotEquals,
LessThan,
GreaterThan,
LessThanOrEqualTo,
GreaterThanOrEqualTo,
Assign,
LeftParen,
RightParen,
LeftBrace,
RightBrace,
LeftBracket,
RightBracket,
Comma,
Dot,
Range,
Ampersand,
Pipe,
Caret,
Tilde,

// Misc
Comment,
Unknown,
};


typedef struct {
inline std::string token_type_to_string(TokenType type) {
switch (type) {
case TokenType::Let: return "Let";
case TokenType::Mut: return "Mut";
case TokenType::If: return "If";
case TokenType::Else: return "Else";
case TokenType::While: return "While";
case TokenType::For: return "For";
case TokenType::In: return "In";
case TokenType::True: return "True";
case TokenType::False: return "False";
case TokenType::IntegerLiteral: return "IntegerLiteral";
case TokenType::StringLiteral: return "StringLiteral";
case TokenType::Identifier: return "Identifier";
case TokenType::Plus: return "Plus";
case TokenType::Minus: return "Minus";
case TokenType::Star: return "Star";
case TokenType::Slash: return "Slash";
case TokenType::And: return "And";
case TokenType::Or: return "Or";
case TokenType::Not: return "Not";
case TokenType::Equals: return "Equals";
case TokenType::NotEquals: return "NotEquals";
case TokenType::LessThan: return "LessThan";
case TokenType::GreaterThan: return "GreaterThan";
case TokenType::LessThanOrEqualTo: return "LessThanOrEqualTo";
case TokenType::GreaterThanOrEqualTo: return "GreaterThanOrEqualTo";
case TokenType::Assign: return "Assign";
case TokenType::LeftParen: return "LeftParen";
case TokenType::RightParen: return "RightParen";
case TokenType::LeftBrace: return "LeftBrace";
case TokenType::RightBrace: return "RightBrace";
case TokenType::LeftBracket: return "LeftBracket";
case TokenType::RightBracket: return "RightBracket";
case TokenType::Comma: return "Comma";
case TokenType::Dot: return "Dot";
case TokenType::Range: return "Range";
case TokenType::Ampersand: return "Ampersand";
case TokenType::Pipe: return "Pipe";
case TokenType::Caret: return "Caret";
case TokenType::Tilde: return "Tilde";
case TokenType::Comment: return "Comment";
case TokenType::Unknown: return "Unknown";
default: return "Unknown";
}
}

class Token {
public:
TokenType type;
char* value;
} Token;

typedef struct {
Token* tokens;
size_t count;
size_t capacity;
} TokenList;

TokenList* create_token_list();
void append_token(TokenList* list, Token token);
void free_tokens(TokenList* list);
std::string value;
size_t line;
size_t column;

Token(TokenType type, std::string value, size_t line, size_t column) {
this->type = type;
this->value = value;
this->line = line;
this->column = column;
}

Token(TokenType type, std::string value) {
this->type = type;
this->value = value;
this->line = 0;
this->column = 0;
}

Token(TokenType type) {
this->type = type;
this->value = "";
this->line = 0;
this->column = 0;
}

inline bool operator==(const Token& other) const {
return this->type == other.type && this->value == other.value;
}

inline bool operator!=(const Token& other) const {
return this->type != other.type || this->value != other.value;
}

inline std::string to_string() {
std::stringstream ss;
ss << "Token(" << token_type_to_string(this->type);
if (!this->value.empty()) {
ss << ", " << this->value;
}
if (this->line != 0 || this->column != 0) {
ss << ", " << this->line << ", " << this->column;
}
ss << ")";
return ss.str();
}
};

#endif
inline std::string token_vector_to_string(std::vector<Token> tokens) {
std::stringstream ss;
ss << "[";
for (size_t i = 0; i < tokens.size(); i++) {
ss << tokens[i].to_string();
if (i < tokens.size() - 1) {
ss << ", ";
}
}
ss << "]";
return ss.str();
}
Empty file removed include/dragon/vm.h
Empty file.
Loading

0 comments on commit 11a4517

Please sign in to comment.