From d84b8c472e8f6ad3e746da0eaa6228adba75021c Mon Sep 17 00:00:00 2001 From: Daniel Boll Date: Wed, 1 Mar 2023 22:54:50 -0300 Subject: [PATCH 01/10] feat(ast): basic concept of the parse and abstract syntax trees Signed-off-by: Daniel Boll --- assets/lang/test.pile | 25 +----- src/lexer/tokens.rs | 35 ++++++-- src/lib.rs | 1 + src/parser/parse.rs | 194 +++++++++++++++++++++++++++++++++++++----- src/semantic/ast.rs | 0 src/semantic/mod.rs | 1 + 6 files changed, 205 insertions(+), 51 deletions(-) create mode 100644 src/semantic/ast.rs create mode 100644 src/semantic/mod.rs diff --git a/assets/lang/test.pile b/assets/lang/test.pile index d05b38c..958b6a2 100644 --- a/assets/lang/test.pile +++ b/assets/lang/test.pile @@ -1,24 +1 @@ -2::i32 - -1 2 + -def(i32) a -1 @a -1 2 + -drop - -while 1 2 drop dup do 1 end - -1 2 = if - 1 -end - -1 2 = if - 1 -else - 2 -end - -def(i32) a -10 range a do - a -end +1 2 3 * + diff --git a/src/lexer/tokens.rs b/src/lexer/tokens.rs index d320287..b8a3da7 100644 --- a/src/lexer/tokens.rs +++ b/src/lexer/tokens.rs @@ -13,6 +13,27 @@ fn def_type(lex: &mut Lexer) -> Option<&'static str> { } } +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum ArithmeticOperators { + Plus, + Minus, + Times, + Divide, + Modulo, +} + +fn parse_arithmetic_op(lex: &mut Lexer) -> Option { + let slice = lex.slice(); + match slice { + "+" => Some(ArithmeticOperators::Plus), + "-" => Some(ArithmeticOperators::Minus), + "*" => Some(ArithmeticOperators::Times), + "/" => Some(ArithmeticOperators::Divide), + "%" => Some(ArithmeticOperators::Modulo), + _ => None, + } +} + #[derive(Logos, Debug, Clone, Copy, PartialEq)] pub enum Token { #[regex(r"[ \t\n\f]+", logos::skip)] @@ -20,11 +41,11 @@ pub enum Token { Error, /// Integer literals - #[regex(r"[0-9]+")] - #[regex(r"0[xX][0-9a-fA-F]+")] - #[regex(r"0b[0-1]+")] - #[regex(r"0o[0-7]+")] - Integer, + #[regex(r"[0-9]+", |lex| lex.slice().parse())] + #[regex(r"0[xX][0-9a-fA-F]+", |lex| i32::from_str_radix(&lex.slice()[2..], 16))] + #[regex(r"0b[0-1]+", |lex| i32::from_str_radix(&lex.slice()[2..], 2))] + #[regex(r"0o[0-7]+", |lex| i32::from_str_radix(&lex.slice()[2..], 8))] + Integer(i32), /// Float literals #[regex("[0-9]+\\.[0-9]+")] @@ -35,8 +56,8 @@ pub enum Token { /// Operators /// Plus, minus, times, divide, modulo - #[regex(r"\+|-|\*|/|%")] - ArithmeticOp, + #[regex(r"\+|-|\*|/|%", parse_arithmetic_op)] + ArithmeticOp(ArithmeticOperators), /// Comparison operators #[regex(r"=|<>|<=|>=|<|>")] diff --git a/src/lib.rs b/src/lib.rs index 9f9683d..514ea09 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3,3 +3,4 @@ pub mod lexer; pub mod grammar; pub mod parser; +pub mod semantic; diff --git a/src/parser/parse.rs b/src/parser/parse.rs index 75809d6..d9895a4 100644 --- a/src/parser/parse.rs +++ b/src/parser/parse.rs @@ -1,14 +1,66 @@ use miette::Result as MietteResult; -use std::collections::VecDeque; +use std::fmt; +use std::{collections::VecDeque, fmt::Display}; use crate::{ grammar::Symbol, - lexer::{tokens::{Token, span_to_tuple}, PileToken}, + lexer::{ + tokens::{span_to_tuple, Token}, + PileToken, + }, parser::{errors::ParseError, Action}, }; use super::SLR::SLR; +#[derive(Debug)] +pub enum ParseTreeNode { + Terminal(Token), + NonTerminal(Symbol, Vec), +} + +impl fmt::Display for ParseTreeNode { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + writeln!(f, "ParseTreeNode")?; + write_node(f, self, "", true) + } +} + +fn write_node( + f: &mut fmt::Formatter, + node: &ParseTreeNode, + prefix: &str, + is_last: bool, +) -> fmt::Result { + let symbol = match node { + ParseTreeNode::Terminal(token) => format!("\x1B[1m{}\x1B[0m", token), + ParseTreeNode::NonTerminal(symbol, _) => symbol.to_string(), + }; + let (node_prefix, child_prefix) = if is_last { + ("\x1B[33m└─\x1B[0m", " ") + } else { + ("\x1B[33m├─\x1B[0m", "\x1B[33m│ \x1B[0m") + }; + writeln!(f, "{}{}{}", prefix, node_prefix, symbol)?; + let child_count = match node { + ParseTreeNode::Terminal(_) => 0, + ParseTreeNode::NonTerminal(_, children) => children.len(), + }; + for (i, child) in node_children(node).iter().enumerate() { + let child_prefix = format!("{}{}", prefix, child_prefix); + let is_last = i == child_count - 1; + write_node(f, child, &child_prefix, is_last)?; + } + Ok(()) +} + +fn node_children(node: &ParseTreeNode) -> Vec<&ParseTreeNode> { + match node { + ParseTreeNode::Terminal(_) => vec![], + ParseTreeNode::NonTerminal(_, children) => children.iter().collect(), + } +} + impl SLR { pub fn parse(&self, tokens: Vec, source_code: &str) -> MietteResult<()> { // A type to store either a usize or a Symbol @@ -20,6 +72,7 @@ impl SLR { // The stack let mut stack: VecDeque = VecDeque::new(); + let mut parse_stack: Vec = Vec::new(); stack.push_back(StackItem::State(0)); // The input @@ -32,11 +85,10 @@ impl SLR { if let StackItem::State(state) = top { if next.is_none() { // TODO: No more tokens to parse error - // return Err("No more tokens to parse".to_string()); } let PileToken { token: current_token, - slice, + slice: _, span, } = next.clone().expect("No more tokens to parse"); @@ -46,26 +98,11 @@ impl SLR { Symbol::Terminal(current_token.get_token_type_only()) }; - // print the queue - // print!("Stack: "); - // for item in stack.iter() { - // match item { - // StackItem::State(state) => print!("{} ", state), - // StackItem::Symbol(symbol) => print!("{} ", symbol), - // } - // } - // println!(); - let action = self .action_table .get(&(*state, symbol.clone())) .unwrap_or_else(|| panic!("No action for state {} and symbol {:?}", state, symbol)); - // println!( - // "State: {}, Token: {}, Action: {:?}", - // state, current_token, action - // ); - match action { Action::Shift(shift_state) => { let symbol = if let Token::End = current_token { @@ -73,24 +110,38 @@ impl SLR { } else { Symbol::Terminal(current_token.to_string()) }; + parse_stack.push(ParseTreeNode::Terminal(current_token)); stack.push_back(StackItem::Symbol(symbol)); stack.push_back(StackItem::State(*shift_state)); next = input.next(); } Action::Reduce(reduce_state) => { let (lhs, rhs) = &self.grammar.productions[*reduce_state]; + + // Normal stack let mut to_pop = rhs.len() * 2; while to_pop > 0 { stack.pop_back(); to_pop -= 1; } + // Parse stack + let mut children = Vec::new(); + for _ in 0..rhs.len() { + let node = parse_stack.pop().unwrap(); + children.push(node); + } + + children.reverse(); + + let node = ParseTreeNode::NonTerminal(lhs.clone(), children); + parse_stack.push(node); + let top = if let StackItem::State(state) = stack.back().unwrap() { state } else { &(0_usize) // TODO: Stack top is not a state error - // return Err("Stack top is not a state".to_string()); }; let state = self @@ -124,6 +175,10 @@ impl SLR { } } + println!("{}", &parse_stack[0]); + let ast = parse_ast(&parse_stack[0]); + println!("{}", &ast[0]); + Ok(()) } @@ -146,3 +201,102 @@ impl SLR { expected_tokens } } + +// Binary ast node +#[derive(Debug, Clone)] +struct AstNode { + symbol: Symbol, + children: Vec, +} + +impl Display for AstNode { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> std::fmt::Result { + writeln!(f, "AST")?; + write_ast_node(f, self, "", true) + } +} + +fn write_ast_node( + f: &mut fmt::Formatter, + node: &AstNode, + prefix: &str, + is_last: bool, +) -> fmt::Result { + let (node_prefix, child_prefix) = if is_last { + ("\x1B[33m└─\x1B[0m", " ") + } else { + ("\x1B[33m├─\x1B[0m", "\x1B[33m│ \x1B[0m") + }; + + writeln!(f, "{}{}{}", prefix, node_prefix, node.symbol)?; + + let children = ast_node_children(node); + for (i, child) in children.iter().enumerate() { + write_ast_node( + f, + child, + &format!("{}{}", prefix, child_prefix), + i == children.len() - 1, + )?; + } + + Ok(()) +} + +fn ast_node_children(node: &AstNode) -> Vec<&AstNode> { + node.children.iter().collect() +} + +fn parse_ast(node: &ParseTreeNode) -> Vec { + // Iterate for each through the leaves of the tree, if the leave is a Integer push it to the + // stack, if it is a operator pop the last two elements of the stack and create a new node + // with the operator and the two elements as children. Append the new node to the stack. + + let mut stack: Vec = Vec::new(); + + // Iterate over the tree inorder + let mut traverse_stack: Vec<&ParseTreeNode> = Vec::new(); + let mut current_node = node; + loop { + if let ParseTreeNode::NonTerminal(_, children) = current_node { + for child in children.iter().rev() { + traverse_stack.push(child); + } + } else if let ParseTreeNode::Terminal(token) = current_node { + match token { + Token::Integer(integer) => { + stack.push(AstNode { + symbol: Symbol::Terminal(integer.to_string()), + children: Vec::new(), + }); + } + Token::ArithmeticOp { .. } => { + let right = stack.pop().unwrap(); + let left = stack.pop().unwrap(); + stack.push(AstNode { + symbol: Symbol::Terminal(token.to_string()), + children: vec![left, right], + }); + } + _ => {} + } + } + + if let Some(next_node) = traverse_stack.pop() { + current_node = next_node; + } else { + break; + } + } + + // If there is more than on element on the stack create a new node with the symbol Program + // and the stack as children + if stack.len() > 1 { + stack = vec![AstNode { + symbol: Symbol::NonTerminal("Program".to_string()), + children: stack, + }]; + } + + stack +} diff --git a/src/semantic/ast.rs b/src/semantic/ast.rs new file mode 100644 index 0000000..e69de29 diff --git a/src/semantic/mod.rs b/src/semantic/mod.rs new file mode 100644 index 0000000..851c0bc --- /dev/null +++ b/src/semantic/mod.rs @@ -0,0 +1 @@ +pub mod ast; From b6dcd11ee1eceb4c782a159233023c605ce14dce Mon Sep 17 00:00:00 2001 From: Daniel Boll Date: Fri, 19 May 2023 14:15:56 -0300 Subject: [PATCH 02/10] feat: add anyhow crate to Cargo.toml feat: add hexadecimal number to test.pile feat: add codegen module to lib.rs feat: add wasm code generator to codegen module feat: add parse_ast function to parse.rs feat: add AstNode struct to parse.rs feat: print abstract syntax tree in main.rs refactor: change parse function in parse.rs to return an Option instead of () refactor: change root node symbol in parse_ast function to "R" instead of "Program" --- Cargo.lock | 7 +++++++ Cargo.toml | 1 + assets/lang/test.pile | 1 + src/codegen/mod.rs | 13 +++++++++++++ src/codegen/wasm.rs | 11 +++++++++++ src/lib.rs | 1 + src/main.rs | 5 ++++- src/parser/parse.rs | 22 +++++++++------------- 8 files changed, 47 insertions(+), 14 deletions(-) create mode 100644 src/codegen/mod.rs create mode 100644 src/codegen/wasm.rs diff --git a/Cargo.lock b/Cargo.lock index 6b62872..9639f79 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -37,6 +37,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "anyhow" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "224afbd727c3d6e4b90103ece64b8d1b67fbb1973b1046c2281eed3f3803f800" + [[package]] name = "atty" version = "0.2.14" @@ -269,6 +275,7 @@ checksum = "7ef03e0a2b150c7a90d01faf6254c9c48a41e95fb2a8c2ac1c6f0d2b9aefc342" name = "rusted-pile" version = "0.1.0" dependencies = [ + "anyhow", "logos", "miette", "thiserror", diff --git a/Cargo.toml b/Cargo.toml index a0c71c0..d4c2195 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,3 +9,4 @@ edition = "2021" logos = "0.12.1" miette = { version = "5.5.0", features = ["fancy"] } thiserror = "1.0.38" +anyhow = "1.0.69" diff --git a/assets/lang/test.pile b/assets/lang/test.pile index 958b6a2..daff1ca 100644 --- a/assets/lang/test.pile +++ b/assets/lang/test.pile @@ -1 +1,2 @@ 1 2 3 * + +1 0x00 diff --git a/src/codegen/mod.rs b/src/codegen/mod.rs new file mode 100644 index 0000000..d5e41e2 --- /dev/null +++ b/src/codegen/mod.rs @@ -0,0 +1,13 @@ +pub trait CodeGenerator { + fn generate(&self) -> anyhow::Result<()>; +} + +pub mod wasm; + +// Choose the code generator based on the target +pub fn choose_code_generator(target: &str) -> Box { + match target { + "wasm" => Box::new(wasm::WasmCodeGenerator {}), + _ => panic!("Unknown target: {}", target), + } +} diff --git a/src/codegen/wasm.rs b/src/codegen/wasm.rs new file mode 100644 index 0000000..7ac2ae2 --- /dev/null +++ b/src/codegen/wasm.rs @@ -0,0 +1,11 @@ +// CodeGenerator + +use super::CodeGenerator; + +pub struct WasmCodeGenerator {} + +impl CodeGenerator for WasmCodeGenerator { + fn generate(&self) -> anyhow::Result<()> { + Ok(()) + } +} diff --git a/src/lib.rs b/src/lib.rs index 514ea09..56f3c3e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,3 +4,4 @@ pub mod lexer; pub mod grammar; pub mod parser; pub mod semantic; +pub mod codegen; diff --git a/src/main.rs b/src/main.rs index f29966b..6a42412 100644 --- a/src/main.rs +++ b/src/main.rs @@ -11,7 +11,10 @@ fn main() -> MietteResult<(), Box> { glc.compute_follow_set().expand(); - SLR::new(glc).parse(tokens, &lang_contents)?; + let abstract_syntax_tree = SLR::new(glc).parse(tokens, &lang_contents)?; + if let Some(abstract_syntax_tree) = abstract_syntax_tree { + println!("{}", abstract_syntax_tree); + } Ok(()) } diff --git a/src/parser/parse.rs b/src/parser/parse.rs index d9895a4..aef6add 100644 --- a/src/parser/parse.rs +++ b/src/parser/parse.rs @@ -62,7 +62,7 @@ fn node_children(node: &ParseTreeNode) -> Vec<&ParseTreeNode> { } impl SLR { - pub fn parse(&self, tokens: Vec, source_code: &str) -> MietteResult<()> { + pub fn parse(&self, tokens: Vec, source_code: &str) -> MietteResult> { // A type to store either a usize or a Symbol #[derive(Debug, Clone)] enum StackItem { @@ -175,11 +175,9 @@ impl SLR { } } - println!("{}", &parse_stack[0]); let ast = parse_ast(&parse_stack[0]); - println!("{}", &ast[0]); - Ok(()) + Ok(Some(ast[0].clone())) } pub fn find_expected_symbol(&self, state: usize) -> Vec { @@ -204,9 +202,9 @@ impl SLR { // Binary ast node #[derive(Debug, Clone)] -struct AstNode { - symbol: Symbol, - children: Vec, +pub struct AstNode { + pub symbol: Symbol, + pub children: Vec, } impl Display for AstNode { @@ -291,12 +289,10 @@ fn parse_ast(node: &ParseTreeNode) -> Vec { // If there is more than on element on the stack create a new node with the symbol Program // and the stack as children - if stack.len() > 1 { - stack = vec![AstNode { - symbol: Symbol::NonTerminal("Program".to_string()), - children: stack, - }]; - } + stack = vec![AstNode { + symbol: Symbol::NonTerminal("R".to_string()), + children: stack, + }]; stack } From 2b24d535fc593a5537a2d521f9b8a1d253a80b3a Mon Sep 17 00:00:00 2001 From: Daniel Boll Date: Thu, 25 May 2023 01:01:48 -0300 Subject: [PATCH 03/10] feat(llvm): add support for stack data structure This commit adds support for a stack data structure in LLVM. It includes a new module `globals/stack.rs` that defines the `Stack` struct and its methods. The `Stack` struct has a `stack` field that represents the global array that stores the elements of the stack, a `top` field that represents a pointer to the top of the stack, and a `size` field that represents the maximum size of the stack. The `Stack` struct has methods to check if the stack is full, store an element in the stack, and get the `Stack` instance. The `Stack` instance is created using the `create` method. This commit also adds a new module `codegen/llvm/builtins/abort.rs` that defines the `AbortBuiltinFunction` struct and its methods. The `AbortBuiltinFunction` struct has methods to declare and get the `abort` function. feat: add LLVM code generator Add LLVM code generator to generate LLVM IR from the AST. The LLVM code generator is added as a new module in the `codegen` module. The `LLVMCodeGenerator` struct is defined in the `llvm` module. The `LLVMManager` struct is defined in the `llvm/manager.rs` file. The `LLVMManager` struct is used to manage the LLVM context, module, and builder. The `LLVMCodeGenerator` struct implements the `CodeGenerator` trait. The `generate` method of the `LLVMCodeGenerator` struct takes an AST as input and generates LLVM IR from it. The `choose_code_generator` function is removed and replaced with the `code_generator` function. The `code_generator` function takes a `CodeGeneratorTarget` enum as input and returns a `Box`. The `CodeGeneratorTarget` enum has --- Cargo.lock | 235 ++++++++++++++++++++++++++- Cargo.toml | 5 + src/codegen/llvm/builtins/abort.rs | 87 ++++++++++ src/codegen/llvm/builtins/mod.rs | 2 + src/codegen/llvm/builtins/push.rs | 59 +++++++ src/codegen/llvm/externs/exit.rs | 47 ++++++ src/codegen/llvm/externs/mod.rs | 2 + src/codegen/llvm/externs/printf.rs | 37 +++++ src/codegen/llvm/globals/mod.rs | 1 + src/codegen/llvm/globals/stack.rs | 111 +++++++++++++ src/codegen/llvm/manager.rs | 76 +++++++++ src/codegen/llvm/mod.rs | 41 +++++ src/codegen/llvm/operations/mod.rs | 0 src/codegen/llvm/utils/functions.rs | 0 src/codegen/llvm/utils/get_params.rs | 37 +++++ src/codegen/llvm/utils/mod.rs | 2 + src/codegen/mod.rs | 16 +- src/codegen/wasm.rs | 16 +- src/lib.rs | 4 +- src/main.rs | 19 ++- 20 files changed, 777 insertions(+), 20 deletions(-) create mode 100644 src/codegen/llvm/builtins/abort.rs create mode 100644 src/codegen/llvm/builtins/mod.rs create mode 100644 src/codegen/llvm/builtins/push.rs create mode 100644 src/codegen/llvm/externs/exit.rs create mode 100644 src/codegen/llvm/externs/mod.rs create mode 100644 src/codegen/llvm/externs/printf.rs create mode 100644 src/codegen/llvm/globals/mod.rs create mode 100644 src/codegen/llvm/globals/stack.rs create mode 100644 src/codegen/llvm/manager.rs create mode 100644 src/codegen/llvm/mod.rs create mode 100644 src/codegen/llvm/operations/mod.rs create mode 100644 src/codegen/llvm/utils/functions.rs create mode 100644 src/codegen/llvm/utils/get_params.rs create mode 100644 src/codegen/llvm/utils/mod.rs diff --git a/Cargo.lock b/Cargo.lock index 9639f79..d66d5cd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -54,6 +54,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + [[package]] name = "backtrace" version = "0.3.67" @@ -75,6 +81,12 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1" +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + [[package]] name = "cc" version = "1.0.79" @@ -87,6 +99,12 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "either" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91" + [[package]] name = "fnv" version = "1.0.7" @@ -128,18 +146,72 @@ dependencies = [ "libc", ] +[[package]] +name = "inkwell" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f4fcb4a4fa0b8f7b4178e24e6317d6f8b95ab500d8e6e1bd4283b6860e369c1" +dependencies = [ + "either", + "inkwell_internals", + "libc", + "llvm-sys", + "once_cell", + "parking_lot", +] + +[[package]] +name = "inkwell_internals" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b185e7d068d6820411502efa14d8fbf010750485399402156b72dd2a548ef8e9" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.16", +] + [[package]] name = "is_ci" version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "616cde7c720bb2bb5824a224687d8f77bfd38922027f01d825cd7453be5099fb" +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + [[package]] name = "libc" version = "0.2.139" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "201de327520df007757c1f0adce6e827fe8562fbc28bfd9c15571c66ca1f5f79" +[[package]] +name = "llvm-sys" +version = "150.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "58b2ce8adf5b4b7f4652994f522ea2639ad388f6ab6b85b229750decf2782d8a" +dependencies = [ + "cc", + "lazy_static", + "libc", + "regex", + "semver", +] + +[[package]] +name = "lock_api" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "435011366fe56583b16cf956f9df0095b405b82d76425bc8981c0e22e60ec4df" +dependencies = [ + "autocfg", + "scopeguard", +] + [[package]] name = "logos" version = "0.12.1" @@ -160,7 +232,7 @@ dependencies = [ "proc-macro2", "quote", "regex-syntax", - "syn", + "syn 1.0.107", ] [[package]] @@ -197,7 +269,7 @@ checksum = "97c2401ab7ac5282ca5c8b518a87635b1a93762b0b90b9990c509888eeccba29" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.107", ] [[package]] @@ -230,24 +302,56 @@ version = "3.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c1b04fb49957986fdce4d6ee7a65027d55d4b6d2265e5848bbb507b58ccfdb6f" +[[package]] +name = "parking_lot" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9069cbb9f99e3a5083476ccb29ceb1de18b9118cafa53e90c9551235de2b9521" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-sys", +] + [[package]] name = "proc-macro2" -version = "1.0.51" +version = "1.0.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d727cae5b39d21da60fa540906919ad737832fe0b1c165da3a34d6548c849d6" +checksum = "fa1fb82fc0c281dd9671101b66b771ebbe1eaf967b96ac8740dcba4b70005ca8" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.23" +version = "1.0.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8856d8364d252a14d474036ea1358d63c9e6965c8e5c1885c18f73d70bff9c7b" +checksum = "8f4f29d145265ec1c483c7c654450edde0bfe043d3938d6972630663356d9500" dependencies = [ "proc-macro2", ] +[[package]] +name = "redox_syscall" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" +dependencies = [ + "bitflags", +] + [[package]] name = "regex" version = "1.7.1" @@ -276,11 +380,42 @@ name = "rusted-pile" version = "0.1.0" dependencies = [ "anyhow", + "inkwell", + "lazy_static", + "llvm-sys", "logos", "miette", + "singleton-manager", "thiserror", ] +[[package]] +name = "scopeguard" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" + +[[package]] +name = "semver" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bebd363326d05ec3e2f532ab7660680f3b02130d780c299bca73469d521bc0ed" + +[[package]] +name = "singleton-manager" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5c61bd18b12d0cfef0d2fdd8cf0ae8e177be98c9b56dceeacf12f692e9192" +dependencies = [ + "uuid", +] + +[[package]] +name = "smallvec" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0" + [[package]] name = "smawk" version = "0.3.1" @@ -326,6 +461,17 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "syn" +version = "2.0.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6f671d4b5ffdb8eadec19c0ae67fe2639df8684bd7bc4b83d986b8db549cf01" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + [[package]] name = "terminal_size" version = "0.1.17" @@ -364,7 +510,7 @@ checksum = "1fb327af4685e4d03fa8cbcf1716380da910eeb2bb8be417e7f9fd3fb164f36f" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.107", ] [[package]] @@ -389,6 +535,15 @@ version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" +[[package]] +name = "uuid" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "345444e32442451b267fc254ae85a209c64be56d2890e601a0c37ff0c3c5ecd2" +dependencies = [ + "getrandom", +] + [[package]] name = "version_check" version = "0.9.4" @@ -422,3 +577,69 @@ name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-sys" +version = "0.45.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" + +[[package]] +name = "windows_i686_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" + +[[package]] +name = "windows_i686_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" diff --git a/Cargo.toml b/Cargo.toml index d4c2195..0380778 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,3 +10,8 @@ logos = "0.12.1" miette = { version = "5.5.0", features = ["fancy"] } thiserror = "1.0.38" anyhow = "1.0.69" +singleton-manager = "0.1.4" + +inkwell = { version = "0.2", features = ["llvm15-0"] } +llvm-sys-150 = { package = "llvm-sys", version = "150.1.0", features = ["prefer-dynamic"] } +lazy_static = "1.4" diff --git a/src/codegen/llvm/builtins/abort.rs b/src/codegen/llvm/builtins/abort.rs new file mode 100644 index 0000000..9d42f33 --- /dev/null +++ b/src/codegen/llvm/builtins/abort.rs @@ -0,0 +1,87 @@ +use inkwell::{ + types::FunctionType, + values::{FunctionValue, IntValue, PointerValue}, + AddressSpace, +}; + +use crate::codegen::llvm::{ + externs::{exit::ExitExtern, printf::PrintfExtern}, + manager::LLVMManager, + utils::get_params::FunctionParams, +}; + +pub struct AbortBuiltinFunction; + +impl AbortBuiltinFunction { + pub fn declare() { + let manager = LLVMManager::get(); + let (context, module, builder) = manager.fetch_all(); + + let abort_type = context.void_type().fn_type( + &[ + context.i32_type().ptr_type(AddressSpace::default()).into(), // Message + context.i32_type().into(), // Exit code + ], + false, + ); + let abort_func = module.add_function("abort", abort_type, None); + + let basic_block = context.append_basic_block(abort_func, "entry"); + { + builder.position_at_end(basic_block); + + let message_ptr = abort_func.get_param::(0).unwrap(); + let exit_code = abort_func.get_param::(1).unwrap(); + + PrintfExtern::call(&[message_ptr.into()]); + ExitExtern::call_from_int(201); + + builder.build_unreachable(); + } + + builder.build_return(None); + } + + pub fn get() -> FunctionValue<'static> { + let manager = LLVMManager::get(); + let module = manager.module(); + + module.get_function("abort").unwrap_or_else(|| { + Self::declare(); + module + .get_function("abort") + .expect("abort function not found") + }) + } + + pub fn call_from_values( + message: &str, + code: i32, + string_name: Option, + call_name: Option, + ) { + let string_name = string_name.unwrap_or_else(|| "message".to_string()); + let call_name = call_name.unwrap_or_else(|| "abort_call".to_string()); + + let manager = LLVMManager::get(); + let builder = manager.builder(); + + let message_ptr = builder.build_global_string_ptr(message, &string_name); + builder.build_call( + Self::get(), + &[ + message_ptr.as_pointer_value().into(), + manager.const_int(code as u64).into(), + ], + &call_name, + ); + } + + pub fn call(args: &[inkwell::values::BasicMetadataValueEnum]) { + let manager = LLVMManager::get(); + + manager + .builder() + .build_call(Self::get(), args, "abort_call"); + } +} diff --git a/src/codegen/llvm/builtins/mod.rs b/src/codegen/llvm/builtins/mod.rs new file mode 100644 index 0000000..113ebed --- /dev/null +++ b/src/codegen/llvm/builtins/mod.rs @@ -0,0 +1,2 @@ +pub mod push; +pub mod abort; diff --git a/src/codegen/llvm/builtins/push.rs b/src/codegen/llvm/builtins/push.rs new file mode 100644 index 0000000..bc4ae9a --- /dev/null +++ b/src/codegen/llvm/builtins/push.rs @@ -0,0 +1,59 @@ +use inkwell::values::IntValue; + +use crate::codegen::llvm::{ + globals::stack::Stack, manager::LLVMManager, utils::get_params::FunctionParams, +}; + +use super::abort::AbortBuiltinFunction; + +pub fn generate_push_function() { + let manager = LLVMManager::get(); + let (context, module, builder) = manager.fetch_all(); + + let i32_type = context.i32_type(); + let insert_type = context.void_type().fn_type(&[i32_type.into()], false); + let insert_func = module.add_function("push", insert_type, None); + let entry = context.append_basic_block(insert_func, "entry"); + + { + builder.position_at_end(entry); + + // Prelude. Get the value from the first parameter + let value = insert_func + .get_param::(0) + .expect("Expected a value in the first parameter of the push function"); + let stack_manager = Stack::get(); + + // 1. Check if the stack is full + let stack_full_block = context.append_basic_block(insert_func, "stack_full"); + let stack_not_full_block = context.append_basic_block(insert_func, "stack_not_full"); + builder.build_conditional_branch( + stack_manager.is_full(), + stack_full_block, + stack_not_full_block, + ); + + // 2. If the stack is full + { + builder.position_at_end(stack_full_block); + + AbortBuiltinFunction::call_from_values( + "[ABORT @ push]: stack is already full", + 1, + Some("error_message_stack_full".to_string()), + None, + ); + + builder.build_unreachable(); + } + + // 3. If not then store the value + { + builder.position_at_end(stack_not_full_block); + + stack_manager.store(value); + } + + builder.build_return(None); + } +} diff --git a/src/codegen/llvm/externs/exit.rs b/src/codegen/llvm/externs/exit.rs new file mode 100644 index 0000000..6b74305 --- /dev/null +++ b/src/codegen/llvm/externs/exit.rs @@ -0,0 +1,47 @@ +use crate::codegen::llvm::manager::LLVMManager; + +pub struct ExitExtern; + +pub const EXTERN_EXIT: &str = "exit"; + +impl ExitExtern { + fn declare() { + let manager = LLVMManager::get(); + let module = manager.module(); + + let exit_type = manager + .void_type() + .fn_type(&[manager.i32_type().into()], false); + module.add_function(EXTERN_EXIT, exit_type, None); + } + + pub fn get() -> inkwell::values::FunctionValue<'static> { + let manager = LLVMManager::get(); + let module = manager.module(); + + module.get_function(EXTERN_EXIT).unwrap_or_else(|| { + ExitExtern::declare(); + module + .get_function(EXTERN_EXIT) + .expect("printf function not found") + }) + } + + pub fn call_from_int(code: i32) { + let manager = LLVMManager::get(); + + manager.builder().build_call( + ExitExtern::get(), + &[manager.const_int(code as u64).into()], + "exit_call", + ); + } + + pub fn call(args: &[inkwell::values::BasicMetadataValueEnum]) { + let manager = LLVMManager::get(); + + manager + .builder() + .build_call(ExitExtern::get(), args, "exit_call"); + } +} diff --git a/src/codegen/llvm/externs/mod.rs b/src/codegen/llvm/externs/mod.rs new file mode 100644 index 0000000..e0773f0 --- /dev/null +++ b/src/codegen/llvm/externs/mod.rs @@ -0,0 +1,2 @@ +pub mod printf; +pub mod exit; diff --git a/src/codegen/llvm/externs/printf.rs b/src/codegen/llvm/externs/printf.rs new file mode 100644 index 0000000..47dd047 --- /dev/null +++ b/src/codegen/llvm/externs/printf.rs @@ -0,0 +1,37 @@ +use crate::codegen::llvm::manager::LLVMManager; + +pub struct PrintfExtern; + +impl PrintfExtern { + pub fn declare() { + let manager = LLVMManager::get(); + let module = manager.module(); + + let printf_type = manager + .i32_type() + .fn_type(&[manager.ptr_i32_type().into()], true); + module.add_function("printf", printf_type, None); + } + + pub fn get() -> inkwell::values::FunctionValue<'static> { + let manager = LLVMManager::get(); + let module = manager.module(); + + module.get_function("printf").unwrap_or_else(|| { + PrintfExtern::declare(); + module + .get_function("printf") + .expect("printf function not found") + }) + } + + pub fn call_from_str(message: &str) {} + + pub fn call(args: &[inkwell::values::BasicMetadataValueEnum]) { + let manager = LLVMManager::get(); + + manager + .builder() + .build_call(PrintfExtern::get(), args, "printf_call"); + } +} diff --git a/src/codegen/llvm/globals/mod.rs b/src/codegen/llvm/globals/mod.rs new file mode 100644 index 0000000..c93897d --- /dev/null +++ b/src/codegen/llvm/globals/mod.rs @@ -0,0 +1 @@ +pub mod stack; diff --git a/src/codegen/llvm/globals/stack.rs b/src/codegen/llvm/globals/stack.rs new file mode 100644 index 0000000..9e7d43a --- /dev/null +++ b/src/codegen/llvm/globals/stack.rs @@ -0,0 +1,111 @@ +use inkwell::values::{BasicValueEnum, GlobalValue, IntValue, PointerValue}; +use singleton_manager::sm; + +use crate::codegen::llvm::manager::LLVMManager; + +pub struct Stack<'ctx> { + pub stack: GlobalValue<'ctx>, + pub top: GlobalValue<'ctx>, + pub size: u32, +} + +pub const STACK_NAME: &str = "gStack"; +pub const STACK_TOP_PTR_NAME: &str = "gTopPtr"; + +impl<'ctx> Stack<'static> { + pub fn new(size: u32) -> Self { + let manager = LLVMManager::get(); + let module = manager.module(); + + let array_type = manager.array_type(size); + + let g_array = module.add_global(array_type, None, STACK_NAME); + g_array.set_linkage(inkwell::module::Linkage::Internal); + g_array.set_initializer(&array_type.const_zero()); + + let g_top = module.add_global(manager.ptr_i32_type(), None, STACK_TOP_PTR_NAME); + g_top.set_linkage(inkwell::module::Linkage::Internal); + g_top.set_initializer(&g_array.as_pointer_value()); + + Self { + stack: g_array, + top: g_top, + size, + } + } + + fn load_top(&self) -> BasicValueEnum { + let manager = LLVMManager::get(); + + manager.builder().build_load( + manager.ptr_i32_type(), + self.top.as_pointer_value(), + STACK_TOP_PTR_NAME, + ) + } + + fn stack_top_ptr(&self) -> PointerValue { + self.top.as_pointer_value() + } + + fn stack_ptr(&self) -> PointerValue { + self.stack.as_pointer_value() + } + + pub fn is_full(&self) -> IntValue { + let manager = LLVMManager::get(); + let builder = manager.builder(); + + let end_of_stack_ptr = unsafe { + builder.build_in_bounds_gep( + manager.i32_type(), + self.stack_ptr(), + &[manager.const_int(self.size as u64)], + "nextTopPtr", + ) + }; + + builder.build_int_compare( + inkwell::IntPredicate::EQ, + self.stack_top_ptr(), + end_of_stack_ptr, + "isFull", + ) + } + + // Store in the stack + pub fn store(&self, value: IntValue) { + let manager = LLVMManager::get(); + let builder = manager.builder(); + + // Load gTop into the gTop variable + let ptr = self.stack_top_ptr(); + + // Store the element at the position where the top is pointing to + builder.build_store(ptr, value); + + // Increment the top + let next_ptr = unsafe { + builder.build_in_bounds_gep( + manager.i32_type(), + ptr, + &[manager.i32_type().const_int(1, false)], + "nextTopPtr", + ) + }; + + builder.build_store(ptr, next_ptr); + } + + pub fn get() -> &'static mut Self { + sm() + .get::("Stack") + .expect("Failed to get Stack. Probably not created yet.") + } + + pub fn create(size: u32) { + sm() + .set("Stack", Self::new(size)) + .expect("Failed to create Stack"); + } +} diff --git a/src/codegen/llvm/manager.rs b/src/codegen/llvm/manager.rs new file mode 100644 index 0000000..60ee320 --- /dev/null +++ b/src/codegen/llvm/manager.rs @@ -0,0 +1,76 @@ +use inkwell::{builder::Builder, context::Context, module::Module}; +use singleton_manager::sm; +use std::sync::{Arc, Mutex}; + +pub struct LLVMManager<'ctx> { + pub guard: Mutex<()>, + pub context: Arc>, + pub module: Module<'ctx>, + pub builder: Builder<'ctx>, +} + +impl<'ctx> LLVMManager<'ctx> { + pub fn new(context: Arc>) -> Self { + let context_clone = Arc::clone(&context); + let module = context_clone.lock().unwrap().create_module("main"); + + let context_clone = Arc::clone(&context); + let builder = context_clone.lock().unwrap().create_builder(); + + Self { + guard: Mutex::new(()), + context, + module, + builder, + } + } + + pub fn context(&self) -> &Context { + &self.context.lock().unwrap() + } + + pub fn module(&self) -> &Module<'ctx> { + &self.module + } + + pub fn builder(&self) -> &Builder<'ctx> { + &self.builder + } + + pub fn fetch_all(&self) -> (&'ctx Context, &Module<'ctx>, &Builder<'ctx>) { + (&self.context(), &self.module, &self.builder) + } + + // Helper functions for types and stuff + pub fn i32_type(&self) -> inkwell::types::IntType<'ctx> { + self.context().i32_type() + } + + pub fn ptr_i32_type(&self) -> inkwell::types::PointerType<'ctx> { + self.i32_type().ptr_type(inkwell::AddressSpace::default()) + } + + pub fn void_type(&self) -> inkwell::types::VoidType<'ctx> { + self.context().void_type() + } + + pub fn array_type(&self, size: u32) -> inkwell::types::ArrayType<'ctx> { + self.i32_type().array_type(size) + } + + pub fn const_int(&self, value: u64) -> inkwell::values::IntValue<'ctx> { + self.i32_type().const_int(value, false) + } + + pub fn get() -> &'static mut Self { + sm() + .get::("LLVMManager") + .expect("Failed to get LLVMManager. Probably not created yet.") + } + + pub fn create(context: Arc>) { + sm() + .set("LLVMManager", LLVMManager::new(context)) + .expect("Failed to create LLVMManager"); + } +} diff --git a/src/codegen/llvm/mod.rs b/src/codegen/llvm/mod.rs new file mode 100644 index 0000000..a49cb0e --- /dev/null +++ b/src/codegen/llvm/mod.rs @@ -0,0 +1,41 @@ +use std::{rc::Rc, sync::{Arc, Mutex}}; + +use inkwell::context::Context; + +use crate::parser::parse::AstNode; + +use self::{globals::stack::Stack, manager::LLVMManager}; + +use super::CodeGenerator; + +pub mod builtins; +pub mod externs; +pub mod globals; +pub mod manager; +pub mod operations; +pub mod utils; + +pub struct LLVMCodeGenerator {} + +impl LLVMCodeGenerator { + pub fn new() -> Self { + Self {} + } +} + +impl Default for LLVMCodeGenerator { + fn default() -> Self { + Self::new() + } +} + +impl CodeGenerator for LLVMCodeGenerator { + fn generate(&self, ast: AstNode) -> anyhow::Result<()> { + let context = Arc::new(Mutex::new(Context::create())); + + LLVMManager::create(context); + Stack::create(1024); + + Ok(()) + } +} diff --git a/src/codegen/llvm/operations/mod.rs b/src/codegen/llvm/operations/mod.rs new file mode 100644 index 0000000..e69de29 diff --git a/src/codegen/llvm/utils/functions.rs b/src/codegen/llvm/utils/functions.rs new file mode 100644 index 0000000..e69de29 diff --git a/src/codegen/llvm/utils/get_params.rs b/src/codegen/llvm/utils/get_params.rs new file mode 100644 index 0000000..a00cbd9 --- /dev/null +++ b/src/codegen/llvm/utils/get_params.rs @@ -0,0 +1,37 @@ +use inkwell::values::{BasicValueEnum, FunctionValue, IntValue, PointerValue}; + +pub trait ParamCast { + fn cast(value: BasicValueEnum) -> Option + where + Self: Sized; +} + +impl ParamCast for PointerValue<'_> { + fn cast(value: BasicValueEnum) -> Option { + if let BasicValueEnum::PointerValue(ptr) = value { + Some(ptr) + } else { + None + } + } +} + +impl ParamCast for IntValue<'_> { + fn cast(value: BasicValueEnum) -> Option { + if let BasicValueEnum::IntValue(int) = value { + Some(int) + } else { + None + } + } +} + +pub trait FunctionParams { + fn get_param(&self, nth: u32) -> Option; +} + +impl FunctionParams for FunctionValue<'_> { + fn get_param(&self, nth: u32) -> Option { + self.get_nth_param(nth).and_then(T::cast) + } +} diff --git a/src/codegen/llvm/utils/mod.rs b/src/codegen/llvm/utils/mod.rs new file mode 100644 index 0000000..cfbf39e --- /dev/null +++ b/src/codegen/llvm/utils/mod.rs @@ -0,0 +1,2 @@ +pub mod get_params; +pub mod functions; diff --git a/src/codegen/mod.rs b/src/codegen/mod.rs index d5e41e2..15db5ca 100644 --- a/src/codegen/mod.rs +++ b/src/codegen/mod.rs @@ -1,13 +1,21 @@ +use crate::parser::parse::AstNode; + pub trait CodeGenerator { - fn generate(&self) -> anyhow::Result<()>; + fn generate(&self, ast: AstNode) -> anyhow::Result<()>; +} + +pub enum CodeGeneratorTarget { + LLVM, + Wasm, } +pub mod llvm; pub mod wasm; // Choose the code generator based on the target -pub fn choose_code_generator(target: &str) -> Box { +pub fn code_generator(target: CodeGeneratorTarget) -> Box { match target { - "wasm" => Box::new(wasm::WasmCodeGenerator {}), - _ => panic!("Unknown target: {}", target), + CodeGeneratorTarget::LLVM => Box::::default(), + CodeGeneratorTarget::Wasm => Box::::default(), } } diff --git a/src/codegen/wasm.rs b/src/codegen/wasm.rs index 7ac2ae2..5ece9c3 100644 --- a/src/codegen/wasm.rs +++ b/src/codegen/wasm.rs @@ -1,11 +1,23 @@ -// CodeGenerator +use crate::parser::parse::AstNode; use super::CodeGenerator; pub struct WasmCodeGenerator {} +impl WasmCodeGenerator { + pub fn new() -> Self { + Self {} + } +} + +impl Default for WasmCodeGenerator { + fn default() -> Self { + Self::new() + } +} + impl CodeGenerator for WasmCodeGenerator { - fn generate(&self) -> anyhow::Result<()> { + fn generate(&self, ast: AstNode) -> anyhow::Result<()> { Ok(()) } } diff --git a/src/lib.rs b/src/lib.rs index 56f3c3e..e921b1c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,7 +1,7 @@ #![feature(const_trait_impl)] -pub mod lexer; +pub mod codegen; pub mod grammar; +pub mod lexer; pub mod parser; pub mod semantic; -pub mod codegen; diff --git a/src/main.rs b/src/main.rs index 6a42412..9298d28 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,20 +1,29 @@ use miette::Result as MietteResult; -use rusted_pile::{grammar, lexer, parser::SLR::SLR}; +use rusted_pile::{ + codegen::{self, CodeGeneratorTarget}, + grammar, lexer, + parser::SLR::SLR, +}; use std::fs; fn main() -> MietteResult<(), Box> { + // Lexer let lang_contents = fs::read_to_string("assets/lang/test.pile")?; let tokens = lexer::generate::compute_tokens(&lang_contents)?; + // Parser let glc_contents = fs::read_to_string("assets/glc/lang.glc")?; let mut glc = grammar::parser::parse(&glc_contents)?; glc.compute_follow_set().expand(); - let abstract_syntax_tree = SLR::new(glc).parse(tokens, &lang_contents)?; - if let Some(abstract_syntax_tree) = abstract_syntax_tree { - println!("{}", abstract_syntax_tree); - } + let abstract_syntax_tree = SLR::new(glc) + .parse(tokens, &lang_contents)? + .ok_or("Failed to parse")?; + + // Codegen + println!("{}", abstract_syntax_tree); + codegen::code_generator(CodeGeneratorTarget::LLVM).generate(abstract_syntax_tree)?; Ok(()) } From 312874db6450bdcd5280f2c3247b5b08b0b769ae Mon Sep 17 00:00:00 2001 From: Daniel Boll Date: Mon, 17 Jul 2023 21:20:33 -0300 Subject: [PATCH 04/10] feat(abort.rs): Refactor `AbortBuiltin` struct and declare `ABORT_FUNCTION_NAME` constant feat(abort.rs): Declare `declare` function in `AbortBuiltin` struct to declare `abort` function feat(abort.rs): Implement `declare` function in `AbortBuiltin` struct to declare `abort` function feat(abort.rs): Implement `call_from_values` function in `AbortBuiltin` struct to call `abort` function with values feat(abort.rs): Implement `get` function in `AbortBuiltin` struct to get `abort` function feat(pop.rs): Create `PopBuiltin` struct feat(pop.rs): Implement `declare` function in `PopBuiltin` struct to declare `pop` function feat(pop.rs): Implement `call` function in `PopBuiltin` struct to call `pop` function feat(pop.rs): Implement `get` function in `PopBuiltin` feat(llvm): add support for arithmetic operations - Add support for arithmetic operations such as plus, minus, times, divide, and modulo. - Implement code generation for each arithmetic operation. - Use LLVM IR instructions to perform the arithmetic operations. - Push the result of the operation onto the stack using the PushBuiltin function. - Pop the operands from the stack using the PopBuiltin function. - The generated code is saved to an LLVM module and printed to a file named "output.ll". - The generated LLVM IR code is compiled using the clang command to produce an executable named "output". feat(llvm): add support for generating LLVM IR code for arithmetic operations feat(llvm): add support for generating LLVM IR code for stack operations feat(llvm): add support for generating LLVM IR code for stack push operation feat(llvm): add support for generating LLVM IR code for stack dump operation fix(llvm): fix stack implementation to use Compiler instead of LLVMManager fix(llvm): fix stack implementation to use Compiler's types and functions fix(llvm): fix stack implementation to use Compiler's builder for code generation fix(llvm): fix stack implementation to use Compiler's module for code generation fix(llvm): fix stack implementation to use Compiler's context for code generation fix(llvm): fix stack implementation to use Compiler's load and store functions fix(llvm): fix stack implementation to use Compiler's basic value enum fix(llvm): fix stack implementation to use Compiler's pointer value fix(llvm): fix stack implementation to use Compiler's chore(llvm): refactor LLVMCodeGenerator module structure feat(llvm): add support for generating LLVM IR code from AST fix(llvm): ensure stack is dropped before context feat(llvm): declare external functions and builtins in LLVMCodeGenerator feat(llvm): generate LLVM IR code from AST using Compiler chore(llvm): remove unused modules and files in llvm/utils directory --- src/codegen/llvm/builtins/abort.rs | 77 +++++------- src/codegen/llvm/builtins/mod.rs | 3 +- src/codegen/llvm/builtins/pop.rs | 79 ++++++++++++ src/codegen/llvm/builtins/push.rs | 115 ++++++++++-------- src/codegen/llvm/compiler/mod.rs | 98 +++++++++++++++ src/codegen/llvm/externs/exit.rs | 68 ++++++----- src/codegen/llvm/externs/printf.rs | 60 +++++---- .../llvm/generate_code/arithmetic/divide.rs | 24 ++++ .../llvm/generate_code/arithmetic/minus.rs | 22 ++++ .../llvm/generate_code/arithmetic/mod.rs | 5 + .../llvm/generate_code/arithmetic/modulo.rs | 24 ++++ .../llvm/generate_code/arithmetic/plus.rs | 22 ++++ .../llvm/generate_code/arithmetic/times.rs | 21 ++++ src/codegen/llvm/generate_code/mod.rs | 51 ++++++++ src/codegen/llvm/generate_code/stack/dump.rs | 16 +++ src/codegen/llvm/generate_code/stack/mod.rs | 2 + src/codegen/llvm/generate_code/stack/push.rs | 19 +++ src/codegen/llvm/globals/stack.rs | 99 ++++++++------- src/codegen/llvm/manager.rs | 76 ------------ src/codegen/llvm/mod.rs | 50 ++++---- src/codegen/llvm/operations/mod.rs | 0 src/codegen/llvm/utils/functions.rs | 0 src/codegen/llvm/utils/get_params.rs | 37 ------ src/codegen/llvm/utils/mod.rs | 2 - 24 files changed, 639 insertions(+), 331 deletions(-) create mode 100644 src/codegen/llvm/builtins/pop.rs create mode 100644 src/codegen/llvm/compiler/mod.rs create mode 100644 src/codegen/llvm/generate_code/arithmetic/divide.rs create mode 100644 src/codegen/llvm/generate_code/arithmetic/minus.rs create mode 100644 src/codegen/llvm/generate_code/arithmetic/mod.rs create mode 100644 src/codegen/llvm/generate_code/arithmetic/modulo.rs create mode 100644 src/codegen/llvm/generate_code/arithmetic/plus.rs create mode 100644 src/codegen/llvm/generate_code/arithmetic/times.rs create mode 100644 src/codegen/llvm/generate_code/mod.rs create mode 100644 src/codegen/llvm/generate_code/stack/dump.rs create mode 100644 src/codegen/llvm/generate_code/stack/mod.rs create mode 100644 src/codegen/llvm/generate_code/stack/push.rs delete mode 100644 src/codegen/llvm/manager.rs delete mode 100644 src/codegen/llvm/operations/mod.rs delete mode 100644 src/codegen/llvm/utils/functions.rs delete mode 100644 src/codegen/llvm/utils/get_params.rs delete mode 100644 src/codegen/llvm/utils/mod.rs diff --git a/src/codegen/llvm/builtins/abort.rs b/src/codegen/llvm/builtins/abort.rs index 9d42f33..88e29cf 100644 --- a/src/codegen/llvm/builtins/abort.rs +++ b/src/codegen/llvm/builtins/abort.rs @@ -1,60 +1,46 @@ -use inkwell::{ - types::FunctionType, - values::{FunctionValue, IntValue, PointerValue}, - AddressSpace, -}; +use inkwell::{values::FunctionValue, AddressSpace}; use crate::codegen::llvm::{ + compiler::Compiler, externs::{exit::ExitExtern, printf::PrintfExtern}, - manager::LLVMManager, - utils::get_params::FunctionParams, }; -pub struct AbortBuiltinFunction; +pub const ABORT_FUNCTION_NAME: &str = "abort"; + +pub struct AbortBuiltin; -impl AbortBuiltinFunction { - pub fn declare() { - let manager = LLVMManager::get(); - let (context, module, builder) = manager.fetch_all(); +impl AbortBuiltin { + pub fn declare(compiler: &Compiler<'_>) { + let builder = compiler.builder(); + let module = compiler.module(); - let abort_type = context.void_type().fn_type( + let i32_type = compiler.i32_type(); + let abort_type = compiler.void_type().fn_type( &[ - context.i32_type().ptr_type(AddressSpace::default()).into(), // Message - context.i32_type().into(), // Exit code + i32_type.ptr_type(AddressSpace::default()).into(), // Message + i32_type.into(), // Exit code ], false, ); - let abort_func = module.add_function("abort", abort_type, None); + let abort_func = module.add_function(ABORT_FUNCTION_NAME, abort_type, None); - let basic_block = context.append_basic_block(abort_func, "entry"); + let basic_block = compiler.append_basic_block(abort_func, "entry"); { builder.position_at_end(basic_block); - let message_ptr = abort_func.get_param::(0).unwrap(); - let exit_code = abort_func.get_param::(1).unwrap(); + let params = abort_func.get_params(); + let message_ptr = params.get(0).unwrap().into_pointer_value(); + let exit_code = params.get(1).unwrap().into_int_value(); - PrintfExtern::call(&[message_ptr.into()]); - ExitExtern::call_from_int(201); + PrintfExtern::call(compiler, &[message_ptr.into()]); + ExitExtern::call(compiler, &[exit_code.into()]); builder.build_unreachable(); } - - builder.build_return(None); - } - - pub fn get() -> FunctionValue<'static> { - let manager = LLVMManager::get(); - let module = manager.module(); - - module.get_function("abort").unwrap_or_else(|| { - Self::declare(); - module - .get_function("abort") - .expect("abort function not found") - }) } pub fn call_from_values( + compiler: &Compiler<'_>, message: &str, code: i32, string_name: Option, @@ -63,25 +49,28 @@ impl AbortBuiltinFunction { let string_name = string_name.unwrap_or_else(|| "message".to_string()); let call_name = call_name.unwrap_or_else(|| "abort_call".to_string()); - let manager = LLVMManager::get(); - let builder = manager.builder(); + let builder = compiler.builder(); let message_ptr = builder.build_global_string_ptr(message, &string_name); + builder.build_call( - Self::get(), + Self::get(compiler), &[ message_ptr.as_pointer_value().into(), - manager.const_int(code as u64).into(), + compiler.const_i32(code).into(), ], &call_name, ); } - pub fn call(args: &[inkwell::values::BasicMetadataValueEnum]) { - let manager = LLVMManager::get(); + pub fn get<'ctx>(compiler: &Compiler<'ctx>) -> FunctionValue<'ctx> { + let module = compiler.module(); - manager - .builder() - .build_call(Self::get(), args, "abort_call"); + module.get_function(ABORT_FUNCTION_NAME).unwrap_or_else(|| { + Self::declare(compiler); + module + .get_function(ABORT_FUNCTION_NAME) + .expect("abort function not found") + }) } } diff --git a/src/codegen/llvm/builtins/mod.rs b/src/codegen/llvm/builtins/mod.rs index 113ebed..fed0d6c 100644 --- a/src/codegen/llvm/builtins/mod.rs +++ b/src/codegen/llvm/builtins/mod.rs @@ -1,2 +1,3 @@ -pub mod push; pub mod abort; +pub mod push; +pub mod pop; diff --git a/src/codegen/llvm/builtins/pop.rs b/src/codegen/llvm/builtins/pop.rs new file mode 100644 index 0000000..4430833 --- /dev/null +++ b/src/codegen/llvm/builtins/pop.rs @@ -0,0 +1,79 @@ +use inkwell::values::{FunctionValue, IntValue, PointerValue}; + +use crate::codegen::llvm::{compiler::Compiler, globals::stack::Stack}; + +use super::abort::AbortBuiltin; + +pub const POP_BUILTIN_FUNCTION_NAME: &str = "pop"; + +pub struct PopBuiltin; + +impl PopBuiltin { + pub fn declare<'ctx>(compiler: &Compiler<'ctx>, stack: &'ctx Stack<'ctx>) { + let builder = compiler.builder(); + let module = compiler.module(); + + let i32_type = compiler.i32_type(); + let remove_type = i32_type.fn_type(&[], false); + let remove_func = module.add_function("pop", remove_type, None); + let entry = compiler.append_basic_block(remove_func, "entry"); + + builder.position_at_end(entry); + + // 1. Check if the stack is empty + let stack_empty_block = compiler.append_basic_block(remove_func, "stack_empty"); + let stack_not_empty_block = compiler.append_basic_block(remove_func, "stack_not_empty"); + builder.build_conditional_branch( + stack.is_empty(compiler), + stack_empty_block, + stack_not_empty_block, + ); + + // 2. If the stack is empty + builder.position_at_end(stack_empty_block); + AbortBuiltin::call_from_values( + compiler, + "[ABORT @ pop]: stack is already empty\n", + 1, + Some("error_message_stack_empty".to_string()), + Some("error_stack_empty".to_string()), + ); + builder.build_unreachable(); + + // 3. If not then remove the value + builder.position_at_end(stack_not_empty_block); + let top_ptr = stack.load_top_ptr(compiler); + let prev_ptr = unsafe { + builder.build_in_bounds_gep( + i32_type, + top_ptr, + &[i32_type.const_int(u64::MAX, true)], + "prevPtr", + ) + }; + builder.build_store::(stack.stack_top_ptr(), prev_ptr); + + // Load and return the top element + let top_element = builder.build_load(i32_type, prev_ptr, "topElement"); + builder.build_return(Some(&top_element)); + } + + pub fn call<'ctx>(compiler: &Compiler<'ctx>) -> IntValue<'ctx> { + let builder = compiler.builder(); + + let top_element = builder.build_call(Self::get(compiler), &[], "topElement"); + + top_element + .try_as_basic_value() + .left() + .unwrap() + .into_int_value() + } + + pub fn get<'ctx>(compiler: &Compiler<'ctx>) -> FunctionValue<'ctx> { + let module = compiler.module(); + module + .get_function(POP_BUILTIN_FUNCTION_NAME) + .expect("pop function not found") + } +} diff --git a/src/codegen/llvm/builtins/push.rs b/src/codegen/llvm/builtins/push.rs index bc4ae9a..d6c17e0 100644 --- a/src/codegen/llvm/builtins/push.rs +++ b/src/codegen/llvm/builtins/push.rs @@ -1,59 +1,78 @@ -use inkwell::values::IntValue; - -use crate::codegen::llvm::{ - globals::stack::Stack, manager::LLVMManager, utils::get_params::FunctionParams, -}; - -use super::abort::AbortBuiltinFunction; - -pub fn generate_push_function() { - let manager = LLVMManager::get(); - let (context, module, builder) = manager.fetch_all(); - - let i32_type = context.i32_type(); - let insert_type = context.void_type().fn_type(&[i32_type.into()], false); - let insert_func = module.add_function("push", insert_type, None); - let entry = context.append_basic_block(insert_func, "entry"); - - { - builder.position_at_end(entry); - - // Prelude. Get the value from the first parameter - let value = insert_func - .get_param::(0) - .expect("Expected a value in the first parameter of the push function"); - let stack_manager = Stack::get(); - - // 1. Check if the stack is full - let stack_full_block = context.append_basic_block(insert_func, "stack_full"); - let stack_not_full_block = context.append_basic_block(insert_func, "stack_not_full"); - builder.build_conditional_branch( - stack_manager.is_full(), - stack_full_block, - stack_not_full_block, - ); +use crate::codegen::llvm::{compiler::Compiler, globals::stack::Stack}; + +use super::abort::AbortBuiltin; + +pub struct PushBuiltin; + +// TODO: Copy the arch-llvm push, it works fine +impl PushBuiltin { + pub fn declare<'ctx>(compiler: &Compiler<'ctx>, stack: &'ctx Stack<'ctx>) { + let builder = compiler.builder(); + let module = compiler.module(); + + let i32_type = compiler.i32_type(); + let insert_type = compiler.void_type().fn_type(&[i32_type.into()], false); + let insert_func = module.add_function("push", insert_type, None); + let entry = compiler.append_basic_block(insert_func, "entry"); - // 2. If the stack is full { - builder.position_at_end(stack_full_block); + builder.position_at_end(entry); - AbortBuiltinFunction::call_from_values( - "[ABORT @ push]: stack is already full", - 1, - Some("error_message_stack_full".to_string()), - None, + // 1. Check if the stack is full + let stack_full_block = compiler.append_basic_block(insert_func, "stack_full"); + let stack_not_full_block = compiler.append_basic_block(insert_func, "stack_not_full"); + builder.build_conditional_branch( + stack.is_full(compiler), + stack_full_block, + stack_not_full_block, ); - builder.build_unreachable(); - } + // 2. If the stack is full + { + builder.position_at_end(stack_full_block); - // 3. If not then store the value - { - builder.position_at_end(stack_not_full_block); + AbortBuiltin::call_from_values( + compiler, + "[ABORT @ push]: stack is already full\n", + 1, + Some("error_message_stack_full".to_string()), + Some("error_stack_full".to_string()), + ); + + builder.build_unreachable(); + } + + // 3. If not then store the value + { + // Get the value from the first parameter + let value = insert_func.get_first_param().unwrap().into_int_value(); - stack_manager.store(value); + builder.position_at_end(stack_not_full_block); + + stack.store(compiler, value); + } + + builder.build_return(None); } + } + + pub fn call_from_int(compiler: &Compiler<'_>, value: i32) { + let builder = compiler.builder(); + + builder.build_call( + Self::get(compiler), + &[compiler.i32_type().const_int(value as u64, false).into()], + "push_int_call", + ); + } + + pub fn call(compiler: &Compiler<'_>, args: &[inkwell::values::BasicMetadataValueEnum]) { + compiler + .builder() + .build_call(Self::get(compiler), args, "printf_call"); + } - builder.build_return(None); + pub fn get<'ctx>(compiler: &Compiler<'ctx>) -> inkwell::values::FunctionValue<'ctx> { + compiler.module().get_function("push").unwrap() } } diff --git a/src/codegen/llvm/compiler/mod.rs b/src/codegen/llvm/compiler/mod.rs new file mode 100644 index 0000000..6744e66 --- /dev/null +++ b/src/codegen/llvm/compiler/mod.rs @@ -0,0 +1,98 @@ +use std::path::Path; + +use inkwell::{builder::Builder, context::Context, module::Module}; + +// use super::wrapper::context::Context; + +pub struct Compiler<'ctx> { + context: &'ctx Context, + module: Module<'ctx>, + builder: Builder<'ctx>, +} + +impl<'ctx> Compiler<'ctx> { + pub fn new(context: &'ctx Context, name: &str) -> Self { + let module = context.create_module(name); + let builder = context.create_builder(); + + Self { + context, + module, + builder, + } + } + + pub fn module(&self) -> &Module<'ctx> { + &self.module + } + + pub fn builder(&self) -> &Builder<'ctx> { + &self.builder + } + + pub fn append_basic_block( + &self, + function: inkwell::values::FunctionValue<'ctx>, + name: &str, + ) -> inkwell::basic_block::BasicBlock<'ctx> { + self.context.append_basic_block(function, name) + } + + // ====================== Types ====================== + + pub fn array_type(&self, size: u32) -> inkwell::types::ArrayType<'ctx> { + self.context.i32_type().array_type(size) + } + + pub fn i32_type(&self) -> inkwell::types::IntType<'ctx> { + self.context.i32_type() + } + + pub fn ptr_i32_type(&self) -> inkwell::types::PointerType<'ctx> { + self + .context + .i32_type() + .ptr_type(inkwell::AddressSpace::default()) + } + + pub fn void_type(&self) -> inkwell::types::VoidType<'ctx> { + self.context.void_type() + } + + pub fn const_i32(&self, value: i32) -> inkwell::values::IntValue<'ctx> { + self.i32_type().const_int(value as u64, false) + } + + pub fn const_u32(&self, value: u32) -> inkwell::values::IntValue<'ctx> { + self.i32_type().const_int(value as u64, false) + } + + pub fn fn_type( + &self, + param_types: &[inkwell::types::BasicMetadataTypeEnum<'ctx>], + is_var_args: bool, + ) -> inkwell::types::FunctionType<'ctx> { + self.context.void_type().fn_type(param_types, is_var_args) + } +} + +impl<'ctx> Drop for Compiler<'ctx> { + fn drop(&mut self) { + let module = &self.module; + + // let binding = module.print_to_string(); + // let output = binding.to_str().unwrap(); + // println!("{}", output); + + module.print_to_file(Path::new("output.ll")).unwrap(); + + // Invoke clang -o output output.ll + std::process::Command::new("clang") + .arg("-o") + .arg("output") + .arg("output.ll") + .arg("-lc") + .output() + .expect("failed to execute process"); + } +} diff --git a/src/codegen/llvm/externs/exit.rs b/src/codegen/llvm/externs/exit.rs index 6b74305..aa12e5f 100644 --- a/src/codegen/llvm/externs/exit.rs +++ b/src/codegen/llvm/externs/exit.rs @@ -1,47 +1,49 @@ -use crate::codegen::llvm::manager::LLVMManager; +use inkwell::values::FunctionValue; -pub struct ExitExtern; +use crate::codegen::llvm::compiler::Compiler; -pub const EXTERN_EXIT: &str = "exit"; +pub const EXIT_FUNCTION_NAME: &str = "exit"; -impl ExitExtern { - fn declare() { - let manager = LLVMManager::get(); - let module = manager.module(); +pub struct ExitExtern; - let exit_type = manager - .void_type() - .fn_type(&[manager.i32_type().into()], false); - module.add_function(EXTERN_EXIT, exit_type, None); +impl ExitExtern { + pub fn declare(compiler: &Compiler<'_>) { + compiler.module().add_function( + EXIT_FUNCTION_NAME, + compiler + .void_type() + .fn_type(&[compiler.i32_type().into()], false), + None, + ); } - pub fn get() -> inkwell::values::FunctionValue<'static> { - let manager = LLVMManager::get(); - let module = manager.module(); - - module.get_function(EXTERN_EXIT).unwrap_or_else(|| { - ExitExtern::declare(); - module - .get_function(EXTERN_EXIT) - .expect("printf function not found") - }) + pub fn get<'ctx>(compiler: &Compiler<'ctx>) -> FunctionValue<'ctx> { + compiler + .module() + .get_function(EXIT_FUNCTION_NAME) + .unwrap_or_else(|| { + Self::declare(compiler); + compiler + .module() + .get_function(EXIT_FUNCTION_NAME) + .expect("exit function not found") + }) } - pub fn call_from_int(code: i32) { - let manager = LLVMManager::get(); - - manager.builder().build_call( - ExitExtern::get(), - &[manager.const_int(code as u64).into()], - "exit_call", + pub fn call_from_int(compiler: &Compiler<'_>, code: i32) { + compiler.builder().build_call( + Self::get(compiler), + &[compiler.const_i32(code).into()], + "exit_call_from_int", ); } - pub fn call(args: &[inkwell::values::BasicMetadataValueEnum]) { - let manager = LLVMManager::get(); - - manager + pub fn call<'ctx>( + compiler: &Compiler<'ctx>, + args: &[inkwell::values::BasicMetadataValueEnum<'ctx>], + ) { + compiler .builder() - .build_call(ExitExtern::get(), args, "exit_call"); + .build_call(Self::get(compiler), args, "exit_call"); } } diff --git a/src/codegen/llvm/externs/printf.rs b/src/codegen/llvm/externs/printf.rs index 47dd047..33e7d90 100644 --- a/src/codegen/llvm/externs/printf.rs +++ b/src/codegen/llvm/externs/printf.rs @@ -1,37 +1,49 @@ -use crate::codegen::llvm::manager::LLVMManager; +use inkwell::values::FunctionValue; + +use crate::codegen::llvm::compiler::Compiler; + +pub const PRINTF_FUNCTION_NAME: &str = "printf"; pub struct PrintfExtern; impl PrintfExtern { - pub fn declare() { - let manager = LLVMManager::get(); - let module = manager.module(); - - let printf_type = manager - .i32_type() - .fn_type(&[manager.ptr_i32_type().into()], true); - module.add_function("printf", printf_type, None); + pub fn declare(compiler: &Compiler<'_>) { + compiler.module().add_function( + PRINTF_FUNCTION_NAME, + compiler + .i32_type() + .fn_type(&[compiler.ptr_i32_type().into()], true), + None, + ); } - pub fn get() -> inkwell::values::FunctionValue<'static> { - let manager = LLVMManager::get(); - let module = manager.module(); - - module.get_function("printf").unwrap_or_else(|| { - PrintfExtern::declare(); - module - .get_function("printf") - .expect("printf function not found") - }) + pub fn get<'ctx>(compiler: &Compiler<'ctx>) -> FunctionValue<'ctx> { + let module = compiler.module(); + + module + .get_function(PRINTF_FUNCTION_NAME) + .unwrap_or_else(|| { + Self::declare(compiler); + module + .get_function(PRINTF_FUNCTION_NAME) + .expect("abort function not found") + }) } - pub fn call_from_str(message: &str) {} + pub fn call_from_str(compiler: &Compiler<'_>, message: &str) { + let builder = compiler.builder(); - pub fn call(args: &[inkwell::values::BasicMetadataValueEnum]) { - let manager = LLVMManager::get(); + let message_ptr = builder.build_global_string_ptr(message, "message"); + builder.build_call( + PrintfExtern::get(compiler), + &[message_ptr.as_pointer_value().into()], + "printf_call_from_str", + ); + } - manager + pub fn call(compiler: &Compiler<'_>, args: &[inkwell::values::BasicMetadataValueEnum]) { + compiler .builder() - .build_call(PrintfExtern::get(), args, "printf_call"); + .build_call(PrintfExtern::get(compiler), args, "printf_call"); } } diff --git a/src/codegen/llvm/generate_code/arithmetic/divide.rs b/src/codegen/llvm/generate_code/arithmetic/divide.rs new file mode 100644 index 0000000..26cd025 --- /dev/null +++ b/src/codegen/llvm/generate_code/arithmetic/divide.rs @@ -0,0 +1,24 @@ +use crate::{ + codegen::llvm::{ + builtins::{pop::PopBuiltin, push::PushBuiltin}, + compiler::Compiler, + generate_code::GenerateLLVMIR, + }, + parser::parse::AstNode, +}; + +pub fn generate(compiler: &Compiler<'_>, ast: &AstNode) -> anyhow::Result<()> { + GenerateLLVMIR::generate(compiler, &ast.children[0])?; + GenerateLLVMIR::generate(compiler, &ast.children[1])?; + + let right = PopBuiltin::call(compiler); + let left = PopBuiltin::call(compiler); + + let result = compiler + .builder() + .build_int_unsigned_div(left, right, "divtmp"); + + PushBuiltin::call(compiler, &[result.into()]); + + Ok(()) +} diff --git a/src/codegen/llvm/generate_code/arithmetic/minus.rs b/src/codegen/llvm/generate_code/arithmetic/minus.rs new file mode 100644 index 0000000..f9344b8 --- /dev/null +++ b/src/codegen/llvm/generate_code/arithmetic/minus.rs @@ -0,0 +1,22 @@ +use crate::{ + codegen::llvm::{ + builtins::{pop::PopBuiltin, push::PushBuiltin}, + compiler::Compiler, + generate_code::GenerateLLVMIR, + }, + parser::parse::AstNode, +}; + +pub fn generate(compiler: &Compiler<'_>, ast: &AstNode) -> anyhow::Result<()> { + GenerateLLVMIR::generate(compiler, &ast.children[0])?; + GenerateLLVMIR::generate(compiler, &ast.children[1])?; + + let right = PopBuiltin::call(compiler); + let left = PopBuiltin::call(compiler); + + let result = compiler.builder().build_int_sub(left, right, "subtmp"); + + PushBuiltin::call(compiler, &[result.into()]); + + Ok(()) +} diff --git a/src/codegen/llvm/generate_code/arithmetic/mod.rs b/src/codegen/llvm/generate_code/arithmetic/mod.rs new file mode 100644 index 0000000..75690c2 --- /dev/null +++ b/src/codegen/llvm/generate_code/arithmetic/mod.rs @@ -0,0 +1,5 @@ +pub mod plus; +pub mod times; +pub mod minus; +pub mod divide; +pub mod modulo; diff --git a/src/codegen/llvm/generate_code/arithmetic/modulo.rs b/src/codegen/llvm/generate_code/arithmetic/modulo.rs new file mode 100644 index 0000000..7dc6568 --- /dev/null +++ b/src/codegen/llvm/generate_code/arithmetic/modulo.rs @@ -0,0 +1,24 @@ +use crate::{ + codegen::llvm::{ + builtins::{pop::PopBuiltin, push::PushBuiltin}, + compiler::Compiler, + generate_code::GenerateLLVMIR, + }, + parser::parse::AstNode, +}; + +pub fn generate(compiler: &Compiler<'_>, ast: &AstNode) -> anyhow::Result<()> { + GenerateLLVMIR::generate(compiler, &ast.children[0])?; + GenerateLLVMIR::generate(compiler, &ast.children[1])?; + + let right = PopBuiltin::call(compiler); + let left = PopBuiltin::call(compiler); + + let result = compiler + .builder() + .build_int_unsigned_rem(left, right, "modtmp"); + + PushBuiltin::call(compiler, &[result.into()]); + + Ok(()) +} diff --git a/src/codegen/llvm/generate_code/arithmetic/plus.rs b/src/codegen/llvm/generate_code/arithmetic/plus.rs new file mode 100644 index 0000000..5bddc6d --- /dev/null +++ b/src/codegen/llvm/generate_code/arithmetic/plus.rs @@ -0,0 +1,22 @@ +use crate::{ + codegen::llvm::{ + builtins::{pop::PopBuiltin, push::PushBuiltin}, + compiler::Compiler, + generate_code::GenerateLLVMIR, + }, + parser::parse::AstNode, +}; + +pub fn generate(compiler: &Compiler<'_>, ast: &AstNode) -> anyhow::Result<()> { + GenerateLLVMIR::generate(compiler, &ast.children[0])?; + GenerateLLVMIR::generate(compiler, &ast.children[1])?; + + let right = PopBuiltin::call(compiler); + let left = PopBuiltin::call(compiler); + + let result = compiler.builder().build_int_add(left, right, "addtmp"); + + PushBuiltin::call(compiler, &[result.into()]); + + Ok(()) +} diff --git a/src/codegen/llvm/generate_code/arithmetic/times.rs b/src/codegen/llvm/generate_code/arithmetic/times.rs new file mode 100644 index 0000000..98682a3 --- /dev/null +++ b/src/codegen/llvm/generate_code/arithmetic/times.rs @@ -0,0 +1,21 @@ +use crate::{ + codegen::llvm::{ + builtins::{pop::PopBuiltin, push::PushBuiltin}, + compiler::Compiler, generate_code::GenerateLLVMIR, + }, + parser::parse::AstNode, +}; + +pub fn generate(compiler: &Compiler<'_>, ast: &AstNode) -> anyhow::Result<()> { + GenerateLLVMIR::generate(compiler, &ast.children[0])?; + GenerateLLVMIR::generate(compiler, &ast.children[1])?; + + let right = PopBuiltin::call(compiler); + let left = PopBuiltin::call(compiler); + + let result = compiler.builder().build_int_mul(left, right, "multmp"); + + PushBuiltin::call(compiler, &[result.into()]); + + Ok(()) +} diff --git a/src/codegen/llvm/generate_code/mod.rs b/src/codegen/llvm/generate_code/mod.rs new file mode 100644 index 0000000..37cbdfa --- /dev/null +++ b/src/codegen/llvm/generate_code/mod.rs @@ -0,0 +1,51 @@ +// Iter over the AST and call the needed functions to generate LLVM IR + +use crate::{ + lexer::tokens::{ArithmeticOperators, StackOperators, Token}, + parser::parse::AstNode, +}; + +use super::{builtins::pop::PopBuiltin, compiler::Compiler}; + +pub mod arithmetic; +pub mod stack; + +pub struct GenerateLLVMIR; + +impl GenerateLLVMIR { + pub fn generate(compiler: &Compiler<'_>, ast: &AstNode) -> anyhow::Result<()> { + match ast.token { + Token::Program => { + let module = compiler.module(); + let builder = compiler.builder(); + + let main_type = compiler.i32_type().fn_type(&[], false); + let main_func = module.add_function("main", main_type, None); + let entry = compiler.append_basic_block(main_func, "entry"); + builder.position_at_end(entry); + + for child in ast.children.iter() { + GenerateLLVMIR::generate(compiler, child)?; + } + + let top_element = PopBuiltin::call(compiler); + builder.build_return(Some(&top_element)); + } + Token::ArithmeticOp(operator) => match operator { + ArithmeticOperators::Plus => arithmetic::plus::generate(compiler, ast)?, + ArithmeticOperators::Times => arithmetic::times::generate(compiler, ast)?, + ArithmeticOperators::Minus => arithmetic::minus::generate(compiler, ast)?, + ArithmeticOperators::Divide => arithmetic::divide::generate(compiler, ast)?, + ArithmeticOperators::Modulo => arithmetic::modulo::generate(compiler, ast)?, + }, + Token::Integer(..) => stack::push::generate(compiler, ast)?, + Token::StackOps(operator) => match operator { + StackOperators::Dump => stack::dump::generate(compiler, ast)?, + _ => todo!(), + }, + _ => todo!(), + } + + Ok(()) + } +} diff --git a/src/codegen/llvm/generate_code/stack/dump.rs b/src/codegen/llvm/generate_code/stack/dump.rs new file mode 100644 index 0000000..3bfe468 --- /dev/null +++ b/src/codegen/llvm/generate_code/stack/dump.rs @@ -0,0 +1,16 @@ +use crate::codegen::llvm::{builtins::pop::PopBuiltin, externs::printf::PrintfExtern}; + +pub fn generate( + compiler: &crate::codegen::llvm::compiler::Compiler<'_>, + _ast: &crate::parser::parse::AstNode, +) -> anyhow::Result<()> { + let value = PopBuiltin::call(compiler); + let message_ptr = compiler + .builder() + .build_global_string_ptr("%d\n", "dump_message") + .as_pointer_value(); + + PrintfExtern::call(compiler, &[message_ptr.into(), value.into()]); + + Ok(()) +} diff --git a/src/codegen/llvm/generate_code/stack/mod.rs b/src/codegen/llvm/generate_code/stack/mod.rs new file mode 100644 index 0000000..a7cdf88 --- /dev/null +++ b/src/codegen/llvm/generate_code/stack/mod.rs @@ -0,0 +1,2 @@ +pub mod dump; +pub mod push; diff --git a/src/codegen/llvm/generate_code/stack/push.rs b/src/codegen/llvm/generate_code/stack/push.rs new file mode 100644 index 0000000..6ad41e6 --- /dev/null +++ b/src/codegen/llvm/generate_code/stack/push.rs @@ -0,0 +1,19 @@ +use crate::{ + codegen::llvm::{builtins::push::PushBuiltin, compiler::Compiler}, + lexer::tokens::Token, + parser::parse::AstNode, +}; + +pub fn generate(compiler: &Compiler<'_>, ast: &AstNode) -> anyhow::Result<()> { + match ast.token { + Token::Integer(value) => { + PushBuiltin::call_from_int(compiler, value); + Ok(()) + } + _ => Err(anyhow::anyhow!("Invalid token")), + } +} + +pub fn generate_from_int(compiler: &Compiler<'_>, value: i32) { + PushBuiltin::call_from_int(compiler, value); +} diff --git a/src/codegen/llvm/globals/stack.rs b/src/codegen/llvm/globals/stack.rs index 9e7d43a..d8bbb84 100644 --- a/src/codegen/llvm/globals/stack.rs +++ b/src/codegen/llvm/globals/stack.rs @@ -1,8 +1,8 @@ use inkwell::values::{BasicValueEnum, GlobalValue, IntValue, PointerValue}; -use singleton_manager::sm; -use crate::codegen::llvm::manager::LLVMManager; +use crate::codegen::llvm::compiler::Compiler; +#[derive(Clone, Copy)] pub struct Stack<'ctx> { pub stack: GlobalValue<'ctx>, pub top: GlobalValue<'ctx>, @@ -12,18 +12,17 @@ pub struct Stack<'ctx> { pub const STACK_NAME: &str = "gStack"; pub const STACK_TOP_PTR_NAME: &str = "gTopPtr"; -impl<'ctx> Stack<'static> { - pub fn new(size: u32) -> Self { - let manager = LLVMManager::get(); - let module = manager.module(); +impl<'ctx> Stack<'ctx> { + pub fn new(size: u32, compiler: &Compiler<'ctx>) -> Self { + let module = compiler.module(); - let array_type = manager.array_type(size); + let array_type = compiler.array_type(size); let g_array = module.add_global(array_type, None, STACK_NAME); g_array.set_linkage(inkwell::module::Linkage::Internal); g_array.set_initializer(&array_type.const_zero()); - let g_top = module.add_global(manager.ptr_i32_type(), None, STACK_TOP_PTR_NAME); + let g_top = module.add_global(compiler.ptr_i32_type(), None, STACK_TOP_PTR_NAME); g_top.set_linkage(inkwell::module::Linkage::Internal); g_top.set_initializer(&g_array.as_pointer_value()); @@ -34,52 +33,56 @@ impl<'ctx> Stack<'static> { } } - fn load_top(&self) -> BasicValueEnum { - let manager = LLVMManager::get(); - - manager.builder().build_load( - manager.ptr_i32_type(), - self.top.as_pointer_value(), - STACK_TOP_PTR_NAME, - ) - } - - fn stack_top_ptr(&self) -> PointerValue { + pub fn stack_top_ptr(&self) -> PointerValue { self.top.as_pointer_value() } - fn stack_ptr(&self) -> PointerValue { + pub fn stack_ptr(&self) -> PointerValue { self.stack.as_pointer_value() } - pub fn is_full(&self) -> IntValue { - let manager = LLVMManager::get(); - let builder = manager.builder(); + pub fn is_full(&'ctx self, compiler: &Compiler<'ctx>) -> IntValue<'ctx> { + let builder = compiler.builder(); + + let top = self.load_top_ptr(compiler); let end_of_stack_ptr = unsafe { builder.build_in_bounds_gep( - manager.i32_type(), + compiler.i32_type(), self.stack_ptr(), - &[manager.const_int(self.size as u64)], + &[compiler.const_u32(self.size)], "nextTopPtr", ) }; builder.build_int_compare( inkwell::IntPredicate::EQ, - self.stack_top_ptr(), + top, end_of_stack_ptr, - "isFull", + "isStackFull", + ) + } + + pub fn is_empty(&'ctx self, compiler: &Compiler<'ctx>) -> IntValue<'ctx> { + let builder = compiler.builder(); + + let top = self.load_top_ptr(compiler); + let start_of_stack_ptr = self.stack_ptr(); + + builder.build_int_compare( + inkwell::IntPredicate::EQ, + top, + start_of_stack_ptr, + "isStackEmpty", ) } // Store in the stack - pub fn store(&self, value: IntValue) { - let manager = LLVMManager::get(); - let builder = manager.builder(); + pub fn store(&'ctx self, compiler: &Compiler<'ctx>, value: IntValue) { + let builder = compiler.builder(); // Load gTop into the gTop variable - let ptr = self.stack_top_ptr(); + let ptr = self.load_top_ptr(compiler); // Store the element at the position where the top is pointing to builder.build_store(ptr, value); @@ -87,25 +90,37 @@ impl<'ctx> Stack<'static> { // Increment the top let next_ptr = unsafe { builder.build_in_bounds_gep( - manager.i32_type(), + compiler.i32_type(), ptr, - &[manager.i32_type().const_int(1, false)], + &[compiler.i32_type().const_int(1, false)], "nextTopPtr", ) }; - builder.build_store(ptr, next_ptr); + builder.build_store(self.stack_top_ptr(), next_ptr); + } + + pub fn load_top(&'ctx self, compiler: &Compiler<'ctx>) -> BasicValueEnum<'ctx> { + compiler.builder().build_load( + compiler.ptr_i32_type(), + self.stack_top_ptr(), + STACK_TOP_PTR_NAME, + ) + } + + pub fn load_top_ptr(&'ctx self, compiler: &Compiler<'ctx>) -> PointerValue<'ctx> { + self.load_top(compiler).into_pointer_value() } - pub fn get() -> &'static mut Self { - sm() - .get::("Stack") - .expect("Failed to get Stack. Probably not created yet.") + pub fn load_size(&'ctx self, compiler: &Compiler<'ctx>) -> BasicValueEnum<'ctx> { + compiler.builder().build_load( + compiler.ptr_i32_type(), + self.stack_top_ptr(), + STACK_TOP_PTR_NAME, + ) } - pub fn create(size: u32) { - sm() - .set("Stack", Self::new(size)) - .expect("Failed to create Stack"); + pub fn load_size_ptr(&'ctx self, compiler: &Compiler<'ctx>) -> PointerValue<'ctx> { + self.load_size(compiler).into_pointer_value() } } diff --git a/src/codegen/llvm/manager.rs b/src/codegen/llvm/manager.rs deleted file mode 100644 index 60ee320..0000000 --- a/src/codegen/llvm/manager.rs +++ /dev/null @@ -1,76 +0,0 @@ -use inkwell::{builder::Builder, context::Context, module::Module}; -use singleton_manager::sm; -use std::sync::{Arc, Mutex}; - -pub struct LLVMManager<'ctx> { - pub guard: Mutex<()>, - pub context: Arc>, - pub module: Module<'ctx>, - pub builder: Builder<'ctx>, -} - -impl<'ctx> LLVMManager<'ctx> { - pub fn new(context: Arc>) -> Self { - let context_clone = Arc::clone(&context); - let module = context_clone.lock().unwrap().create_module("main"); - - let context_clone = Arc::clone(&context); - let builder = context_clone.lock().unwrap().create_builder(); - - Self { - guard: Mutex::new(()), - context, - module, - builder, - } - } - - pub fn context(&self) -> &Context { - &self.context.lock().unwrap() - } - - pub fn module(&self) -> &Module<'ctx> { - &self.module - } - - pub fn builder(&self) -> &Builder<'ctx> { - &self.builder - } - - pub fn fetch_all(&self) -> (&'ctx Context, &Module<'ctx>, &Builder<'ctx>) { - (&self.context(), &self.module, &self.builder) - } - - // Helper functions for types and stuff - pub fn i32_type(&self) -> inkwell::types::IntType<'ctx> { - self.context().i32_type() - } - - pub fn ptr_i32_type(&self) -> inkwell::types::PointerType<'ctx> { - self.i32_type().ptr_type(inkwell::AddressSpace::default()) - } - - pub fn void_type(&self) -> inkwell::types::VoidType<'ctx> { - self.context().void_type() - } - - pub fn array_type(&self, size: u32) -> inkwell::types::ArrayType<'ctx> { - self.i32_type().array_type(size) - } - - pub fn const_int(&self, value: u64) -> inkwell::values::IntValue<'ctx> { - self.i32_type().const_int(value, false) - } - - pub fn get() -> &'static mut Self { - sm() - .get::("LLVMManager") - .expect("Failed to get LLVMManager. Probably not created yet.") - } - - pub fn create(context: Arc>) { - sm() - .set("LLVMManager", LLVMManager::new(context)) - .expect("Failed to create LLVMManager"); - } -} diff --git a/src/codegen/llvm/mod.rs b/src/codegen/llvm/mod.rs index a49cb0e..5f8ae01 100644 --- a/src/codegen/llvm/mod.rs +++ b/src/codegen/llvm/mod.rs @@ -1,40 +1,42 @@ -use std::{rc::Rc, sync::{Arc, Mutex}}; - use inkwell::context::Context; use crate::parser::parse::AstNode; -use self::{globals::stack::Stack, manager::LLVMManager}; +use self::{ + builtins::{abort::AbortBuiltin, pop::PopBuiltin, push::PushBuiltin}, + compiler::Compiler, + externs::{exit::ExitExtern, printf::PrintfExtern}, + globals::stack::Stack, +}; use super::CodeGenerator; pub mod builtins; +pub mod compiler; pub mod externs; +pub mod generate_code; pub mod globals; -pub mod manager; -pub mod operations; -pub mod utils; - -pub struct LLVMCodeGenerator {} - -impl LLVMCodeGenerator { - pub fn new() -> Self { - Self {} - } -} -impl Default for LLVMCodeGenerator { - fn default() -> Self { - Self::new() - } -} +#[derive(Default)] +pub struct LLVMCodeGenerator; impl CodeGenerator for LLVMCodeGenerator { - fn generate(&self, ast: AstNode) -> anyhow::Result<()> { - let context = Arc::new(Mutex::new(Context::create())); - - LLVMManager::create(context); - Stack::create(1024); + fn generate(&mut self, ast: AstNode) -> anyhow::Result<()> { + // This trick is to ensure that stack is dropped before context + let stack; + { + let context = Context::create(); + let compiler = Compiler::new(&context, "main"); + stack = Stack::new(64 * 1024, &compiler); + + PrintfExtern::declare(&compiler); + ExitExtern::declare(&compiler); + AbortBuiltin::declare(&compiler); + PushBuiltin::declare(&compiler, &stack); + PopBuiltin::declare(&compiler, &stack); + + generate_code::GenerateLLVMIR::generate(&compiler, &ast)?; + } Ok(()) } diff --git a/src/codegen/llvm/operations/mod.rs b/src/codegen/llvm/operations/mod.rs deleted file mode 100644 index e69de29..0000000 diff --git a/src/codegen/llvm/utils/functions.rs b/src/codegen/llvm/utils/functions.rs deleted file mode 100644 index e69de29..0000000 diff --git a/src/codegen/llvm/utils/get_params.rs b/src/codegen/llvm/utils/get_params.rs deleted file mode 100644 index a00cbd9..0000000 --- a/src/codegen/llvm/utils/get_params.rs +++ /dev/null @@ -1,37 +0,0 @@ -use inkwell::values::{BasicValueEnum, FunctionValue, IntValue, PointerValue}; - -pub trait ParamCast { - fn cast(value: BasicValueEnum) -> Option - where - Self: Sized; -} - -impl ParamCast for PointerValue<'_> { - fn cast(value: BasicValueEnum) -> Option { - if let BasicValueEnum::PointerValue(ptr) = value { - Some(ptr) - } else { - None - } - } -} - -impl ParamCast for IntValue<'_> { - fn cast(value: BasicValueEnum) -> Option { - if let BasicValueEnum::IntValue(int) = value { - Some(int) - } else { - None - } - } -} - -pub trait FunctionParams { - fn get_param(&self, nth: u32) -> Option; -} - -impl FunctionParams for FunctionValue<'_> { - fn get_param(&self, nth: u32) -> Option { - self.get_nth_param(nth).and_then(T::cast) - } -} diff --git a/src/codegen/llvm/utils/mod.rs b/src/codegen/llvm/utils/mod.rs deleted file mode 100644 index cfbf39e..0000000 --- a/src/codegen/llvm/utils/mod.rs +++ /dev/null @@ -1,2 +0,0 @@ -pub mod get_params; -pub mod functions; From 37cb1540f7aff11e054889d3ce96a10fbed4658e Mon Sep 17 00:00:00 2001 From: Daniel Boll Date: Mon, 17 Jul 2023 21:21:20 -0300 Subject: [PATCH 05/10] feat(codegen): add support for virtual machine code generation feat(codegen/vm): implement virtual machine code generation feat(codegen/wasm): implement WebAssembly code generation feat(interpreter/vm): implement virtual machine interpreter feat(interpreter/vm/arithmetic): implement arithmetic instructions for the virtual machine feat(interpreter/vm/comparison): implement comparison instructions for the virtual machine feat(interpreter/vm/stack): implement stack manipulation instructions for the virtual machine feat(interpreter/vm/value): implement value types for the virtual machine feat(interpreter): add support for running virtual machine bytecode feat: add support for stack operations in the virtual machine feat: add support for dump instruction in the virtual machine feat: add support for dup instruction in the virtual machine feat: add support for pop instruction in the virtual machine feat: add support for push instruction in the virtual machine feat: add support for value types in the virtual machine feat: add support for integer literals in the lexer feat: add support for float literals in the lexer feat: add support for string literals in the lexer feat: add support for arithmetic operators in the lexer feat: add support for comparison operators in the lexer feat: add support for stack operators in the lexer feat: add support for def type in the lexer feat: add support for identifiers in the lexer feat: add support for end of input in the lexer feat: add support for comments in the lexer feat: add support for interpreter module feat: add support for virtual machine chore(parse.rs): remove println statement feat(parse.rs): add token field to AstNode struct feat(parse.rs): add AstNodeIter struct and implement Iterator for AstNodeIter feat(parse.rs): add AstNodeDetailedIter struct and implement Iterator for AstNodeDetailedIter feat(parse.rs): implement Display trait for AstNode feat(parse.rs): modify parse_ast function to populate token field in AstNode struct --- src/codegen/mod.rs | 5 +- src/codegen/vm/mod.rs | 161 +++++++++++++++++++++++++++ src/codegen/{wasm.rs => wasm/mod.rs} | 2 +- src/interpreter/mod.rs | 1 + src/interpreter/vm/arithmetic/mod.rs | 46 ++++++++ src/interpreter/vm/comparison/mod.rs | 58 ++++++++++ src/interpreter/vm/mod.rs | 94 ++++++++++++++++ src/interpreter/vm/stack/dump.rs | 14 +++ src/interpreter/vm/stack/dup.rs | 12 ++ src/interpreter/vm/stack/mod.rs | 4 + src/interpreter/vm/stack/pop.rs | 10 ++ src/interpreter/vm/stack/push.rs | 10 ++ src/interpreter/vm/value.rs | 63 +++++++++++ src/lexer/tokens.rs | 99 +++++++++++++--- src/lib.rs | 1 + src/main.rs | 30 ++++- src/parser/parse.rs | 122 +++++++++++++++----- 17 files changed, 678 insertions(+), 54 deletions(-) create mode 100644 src/codegen/vm/mod.rs rename src/codegen/{wasm.rs => wasm/mod.rs} (82%) create mode 100644 src/interpreter/mod.rs create mode 100644 src/interpreter/vm/arithmetic/mod.rs create mode 100644 src/interpreter/vm/comparison/mod.rs create mode 100644 src/interpreter/vm/mod.rs create mode 100644 src/interpreter/vm/stack/dump.rs create mode 100644 src/interpreter/vm/stack/dup.rs create mode 100644 src/interpreter/vm/stack/mod.rs create mode 100644 src/interpreter/vm/stack/pop.rs create mode 100644 src/interpreter/vm/stack/push.rs create mode 100644 src/interpreter/vm/value.rs diff --git a/src/codegen/mod.rs b/src/codegen/mod.rs index 15db5ca..66a06cc 100644 --- a/src/codegen/mod.rs +++ b/src/codegen/mod.rs @@ -1,15 +1,17 @@ use crate::parser::parse::AstNode; pub trait CodeGenerator { - fn generate(&self, ast: AstNode) -> anyhow::Result<()>; + fn generate(&mut self, ast: AstNode) -> anyhow::Result<()>; } pub enum CodeGeneratorTarget { LLVM, Wasm, + VirtualMachine, } pub mod llvm; +pub mod vm; pub mod wasm; // Choose the code generator based on the target @@ -17,5 +19,6 @@ pub fn code_generator(target: CodeGeneratorTarget) -> Box { match target { CodeGeneratorTarget::LLVM => Box::::default(), CodeGeneratorTarget::Wasm => Box::::default(), + CodeGeneratorTarget::VirtualMachine => Box::::default(), } } diff --git a/src/codegen/vm/mod.rs b/src/codegen/vm/mod.rs new file mode 100644 index 0000000..213ef9b --- /dev/null +++ b/src/codegen/vm/mod.rs @@ -0,0 +1,161 @@ +use serde::{Deserialize, Serialize}; + +use crate::{ + lexer::tokens::{ArithmeticOperators, ComparisonOperators, StackOperators, Token}, + parser::parse::AstNode, +}; +use std::{collections::HashMap, fs::File}; + +use super::CodeGenerator; + +#[derive(Serialize, Deserialize, Debug, Clone)] +pub enum ByteCode { + // Stack manipulation + PushInt(i32), + PushFloat(f32), + PushStr(String), + PushBool(bool), + Pop, + Dump, + Dup, + + // Arithmetic + Add, + Sub, + Mul, + Div, + Mod, + + // Comparison + Eq, + Neq, + Lt, + Gt, + Leq, + Geq, +} + +pub struct VMCodeGenerator; + +impl VMCodeGenerator { + pub fn new() -> Self { + Self {} + } + + pub fn generate_two_children_code( + left: &AstNode, + right: &AstNode, + opcode: ByteCode, + ) -> anyhow::Result> { + let mut bytecode = VMCodeGenerator::generate_byte_code(left)?; + bytecode.append(&mut VMCodeGenerator::generate_byte_code(right)?); + bytecode.push(opcode); + Ok(bytecode) + } + + pub fn generate_byte_code(ast: &AstNode) -> anyhow::Result> { + match ast.token.clone() { + Token::Program => { + let mut bytecode = vec![]; + + for child in ast.children.iter() { + bytecode.append(&mut VMCodeGenerator::generate_byte_code(child)?); + } + + Ok(bytecode) + } + Token::Integer(value) => Ok(vec![ByteCode::PushInt(value)]), + Token::Float(value) => Ok(vec![ByteCode::PushFloat(value)]), + Token::String(value) => Ok(vec![ByteCode::PushStr(value)]), + Token::StackOps(operator) => match operator { + StackOperators::Dump => Ok(vec![ByteCode::Dump]), + StackOperators::Dup => Ok(vec![ByteCode::Dup]), + _ => Err(anyhow::anyhow!( + "Currently unsupported token: {:?}", + ast.token + )), + }, + Token::ArithmeticOp(operator) => { + use ArithmeticOperators::*; + use ByteCode::*; + + if let Some(opcode) = vec![ + (Plus, Add), + (Times, Mul), + (Minus, Sub), + (Divide, Div), + (Modulo, Mod), + ] + .into_iter() + .collect::>() + .get(&operator) + { + Self::generate_two_children_code(&ast.children[0], &ast.children[1], opcode.clone()) + } else { + Err(anyhow::anyhow!( + "Currently unsupported token: {:?}", + ast.token + )) + } + } + Token::ComparisonOp(operator) => { + use ByteCode::*; + use ComparisonOperators::*; + + if let Some(opcode) = vec![ + (EqualTo, Eq), + (NotEqualTo, Neq), + (LessThan, Lt), + (GreaterThan, Gt), + (LessThanOrEqualTo, Leq), + (GreaterThanOrEqualTo, Geq), + ] + .into_iter() + .collect::>() + .get(&operator) + { + Self::generate_two_children_code(&ast.children[0], &ast.children[1], opcode.clone()) + } else { + Err(anyhow::anyhow!( + "Currently unsupported token: {:?}", + ast.token + )) + } + } + _ => Err(anyhow::anyhow!( + "Currently unsupported token: {:?}", + ast.token + )), + } + } + + pub fn encode_byte_code(bytecode: Vec) -> anyhow::Result<()> { + let encoded: Vec = bincode::serialize(&bytecode).unwrap(); + + use std::io::Write; + + let mut file = + File::create("bytecode.bin").map_err(|e| anyhow::anyhow!("Error creating file: {}", e))?; + file + .write_all(&encoded) + .map_err(|e| anyhow::anyhow!("Error writing to file: {}", e))?; + + Ok(()) + } +} + +impl Default for VMCodeGenerator { + fn default() -> Self { + Self::new() + } +} + +impl CodeGenerator for VMCodeGenerator { + fn generate(&mut self, ast: AstNode) -> anyhow::Result<()> { + let bytecode = VMCodeGenerator::generate_byte_code(&ast)?; + println!("{:?}", bytecode); + VMCodeGenerator::encode_byte_code(bytecode)?; + + Ok(()) + } +} diff --git a/src/codegen/wasm.rs b/src/codegen/wasm/mod.rs similarity index 82% rename from src/codegen/wasm.rs rename to src/codegen/wasm/mod.rs index 5ece9c3..e139181 100644 --- a/src/codegen/wasm.rs +++ b/src/codegen/wasm/mod.rs @@ -17,7 +17,7 @@ impl Default for WasmCodeGenerator { } impl CodeGenerator for WasmCodeGenerator { - fn generate(&self, ast: AstNode) -> anyhow::Result<()> { + fn generate(&mut self, _ast: AstNode) -> anyhow::Result<()> { Ok(()) } } diff --git a/src/interpreter/mod.rs b/src/interpreter/mod.rs new file mode 100644 index 0000000..e44013b --- /dev/null +++ b/src/interpreter/mod.rs @@ -0,0 +1 @@ +pub mod vm; diff --git a/src/interpreter/vm/arithmetic/mod.rs b/src/interpreter/vm/arithmetic/mod.rs new file mode 100644 index 0000000..c44935c --- /dev/null +++ b/src/interpreter/vm/arithmetic/mod.rs @@ -0,0 +1,46 @@ +use super::value::Value; + +pub struct ArithmeticInstruction; + +pub enum ArithmeticMethod { + Add, + Sub, + Mul, + Div, + Mod, +} + +impl ArithmeticInstruction { + pub fn eval(stack: &mut Vec, instruction: ArithmeticMethod) -> anyhow::Result<()> { + let rhs = stack.pop().ok_or(anyhow::anyhow!("Add on empty stack"))?; + let lhs = stack.pop().ok_or(anyhow::anyhow!("Add on empty stack"))?; + + match (lhs.clone(), rhs.clone()) { + (Value::Int(lhs), Value::Int(rhs)) => match instruction { + ArithmeticMethod::Add => stack.push(Value::Int(lhs + rhs)), + ArithmeticMethod::Sub => stack.push(Value::Int(lhs - rhs)), + ArithmeticMethod::Mul => stack.push(Value::Int(lhs * rhs)), + ArithmeticMethod::Div => { + if rhs == 0 { + return Err(anyhow::anyhow!("Divide by zero")); + } + stack.push(Value::Int(lhs / rhs)) + } + ArithmeticMethod::Mod => { + if rhs == 0 { + return Err(anyhow::anyhow!("Modulo by zero")); + } + stack.push(Value::Int(lhs % rhs)) + } + }, + _ => { + return Err(anyhow::anyhow!(format!( + "Add on non-integers: {} + {}", + lhs, rhs + ))); + } + } + + Ok(()) + } +} diff --git a/src/interpreter/vm/comparison/mod.rs b/src/interpreter/vm/comparison/mod.rs new file mode 100644 index 0000000..fd996ca --- /dev/null +++ b/src/interpreter/vm/comparison/mod.rs @@ -0,0 +1,58 @@ +use super::value::Value; + +pub struct ComparisonInstruction; + +pub enum ComparisonMethod { + LessThan, + LessThanEqual, + GreaterThan, + GreaterThanEqual, + Equal, + NotEqual, +} + +impl ComparisonInstruction { + fn push_into_stack bool>( + stack: &mut Vec, + comparison: F, + ) -> anyhow::Result<()> { + let rhs = stack + .pop() + .ok_or(anyhow::anyhow!("Comparison on empty stack"))?; + let lhs = stack + .pop() + .ok_or(anyhow::anyhow!("Comparison on empty stack"))?; + + let result = comparison(lhs, rhs); + + stack.push(Value::Bool(result)); + + Ok(()) + } + + pub fn eval(stack: &mut Vec, instruction: ComparisonMethod) -> anyhow::Result<()> { + match instruction { + ComparisonMethod::LessThan => { + Self::push_into_stack(stack, |lhs: Value, rhs: Value| (lhs < rhs)) + } + ComparisonMethod::LessThanEqual => { + Self::push_into_stack(stack, |lhs: Value, rhs: Value| (lhs <= rhs)) + } + ComparisonMethod::GreaterThan => { + Self::push_into_stack(stack, |lhs: Value, rhs: Value| (lhs > rhs)) + } + ComparisonMethod::GreaterThanEqual => { + Self::push_into_stack(stack, |lhs: Value, rhs: Value| (lhs >= rhs)) + } + ComparisonMethod::Equal => { + Self::push_into_stack(stack, |lhs: Value, rhs: Value| (lhs == rhs)) + } + ComparisonMethod::NotEqual => { + Self::push_into_stack(stack, |lhs: Value, rhs: Value| (lhs != rhs)) + } + } + .map_err(|err| anyhow::anyhow!(format!("Error while comparing: {}", err)))?; + + Ok(()) + } +} diff --git a/src/interpreter/vm/mod.rs b/src/interpreter/vm/mod.rs new file mode 100644 index 0000000..84ad013 --- /dev/null +++ b/src/interpreter/vm/mod.rs @@ -0,0 +1,94 @@ +use std::{fs::File, io::Read}; + +use crate::codegen::vm::ByteCode; + +use self::{ + arithmetic::{ArithmeticInstruction, ArithmeticMethod}, + comparison::{ComparisonInstruction, ComparisonMethod}, + stack::{dump::DumpInstruction, dup::DupInstruction, pop::PopInstruction, push::PushInstruction}, + value::Value, +}; + +pub mod arithmetic; +pub mod comparison; +pub mod stack; +pub mod value; + +pub struct VMInterpreter; + +impl VMInterpreter { + pub fn open(bytecode_file: &str) -> anyhow::Result> { + let mut file = + File::open(bytecode_file).map_err(|e| anyhow::anyhow!("Error opening file: {}", e))?; + let mut encoded = Vec::new(); + file + .read_to_end(&mut encoded) + .map_err(|e| anyhow::anyhow!("Error reading file: {}", e))?; + + bincode::deserialize(&encoded).map_err(|e| anyhow::anyhow!("Error deserializing: {}", e)) + } + + pub fn run(bytecode_file: &str) -> anyhow::Result<()> { + let bytecode = VMInterpreter::open(bytecode_file)?; + // println!("{:?}", bytecode); + + VM::new().execute(&bytecode)?; + + Ok(()) + } +} + +pub struct VM { + stack: Vec, +} + +impl VM { + /// Creates a new [`VM`]. + pub fn new() -> Self { + Self { stack: vec![] } + } + + pub fn execute(&mut self, bytecode: &[ByteCode]) -> anyhow::Result<()> { + for instruction in bytecode { + match instruction { + // Stack + ByteCode::PushInt(value) => PushInstruction::eval(&mut self.stack, *value)?, + ByteCode::PushFloat(value) => PushInstruction::eval(&mut self.stack, *value)?, + ByteCode::PushStr(value) => PushInstruction::eval(&mut self.stack, value.clone())?, + ByteCode::PushBool(value) => PushInstruction::eval(&mut self.stack, *value)?, + ByteCode::Dump => DumpInstruction::eval(&mut self.stack)?, + ByteCode::Dup => DupInstruction::eval(&mut self.stack)?, + ByteCode::Pop => PopInstruction::eval(&mut self.stack)?, + + // Arithmetic + ByteCode::Add => ArithmeticInstruction::eval(&mut self.stack, ArithmeticMethod::Add)?, + ByteCode::Sub => ArithmeticInstruction::eval(&mut self.stack, ArithmeticMethod::Sub)?, + ByteCode::Mul => ArithmeticInstruction::eval(&mut self.stack, ArithmeticMethod::Mul)?, + ByteCode::Div => ArithmeticInstruction::eval(&mut self.stack, ArithmeticMethod::Div)?, + ByteCode::Mod => ArithmeticInstruction::eval(&mut self.stack, ArithmeticMethod::Mod)?, + + // Comparison + ByteCode::Eq => ComparisonInstruction::eval(&mut self.stack, ComparisonMethod::Equal)?, + ByteCode::Neq => ComparisonInstruction::eval(&mut self.stack, ComparisonMethod::NotEqual)?, + ByteCode::Lt => ComparisonInstruction::eval(&mut self.stack, ComparisonMethod::LessThan)?, + ByteCode::Leq => { + ComparisonInstruction::eval(&mut self.stack, ComparisonMethod::LessThanEqual)? + } + ByteCode::Gt => { + ComparisonInstruction::eval(&mut self.stack, ComparisonMethod::GreaterThan)? + } + ByteCode::Geq => { + ComparisonInstruction::eval(&mut self.stack, ComparisonMethod::GreaterThanEqual)? + } + } + } + + Ok(()) + } +} + +impl Default for VM { + fn default() -> Self { + Self::new() + } +} diff --git a/src/interpreter/vm/stack/dump.rs b/src/interpreter/vm/stack/dump.rs new file mode 100644 index 0000000..1f746cd --- /dev/null +++ b/src/interpreter/vm/stack/dump.rs @@ -0,0 +1,14 @@ +use crate::interpreter::vm::value::Value; + +pub struct DumpInstruction; + +impl DumpInstruction { + pub fn eval(stack: &mut Vec) -> anyhow::Result<()> { + // take the top of the stack and print it + let top = stack.pop().ok_or(anyhow::anyhow!("Dump on empty stack"))?; + + println!("{}", top); + + Ok(()) + } +} diff --git a/src/interpreter/vm/stack/dup.rs b/src/interpreter/vm/stack/dup.rs new file mode 100644 index 0000000..d9df049 --- /dev/null +++ b/src/interpreter/vm/stack/dup.rs @@ -0,0 +1,12 @@ +use crate::interpreter::vm::value::Value; + +pub struct DupInstruction; + +impl DupInstruction { + pub fn eval(stack: &mut Vec) -> anyhow::Result<()> { + let top = stack.last().ok_or(anyhow::anyhow!("Dup on empty stack"))?; + stack.push(top.clone()); + + Ok(()) + } +} diff --git a/src/interpreter/vm/stack/mod.rs b/src/interpreter/vm/stack/mod.rs new file mode 100644 index 0000000..ef15035 --- /dev/null +++ b/src/interpreter/vm/stack/mod.rs @@ -0,0 +1,4 @@ +pub mod pop; +pub mod push; +pub mod dup; +pub mod dump; diff --git a/src/interpreter/vm/stack/pop.rs b/src/interpreter/vm/stack/pop.rs new file mode 100644 index 0000000..55bd396 --- /dev/null +++ b/src/interpreter/vm/stack/pop.rs @@ -0,0 +1,10 @@ +use crate::interpreter::vm::value::Value; + +pub struct PopInstruction; + +impl PopInstruction { + pub fn eval(stack: &mut Vec) -> anyhow::Result<()> { + stack.pop().ok_or(anyhow::anyhow!("Pop on empty stack"))?; + Ok(()) + } +} diff --git a/src/interpreter/vm/stack/push.rs b/src/interpreter/vm/stack/push.rs new file mode 100644 index 0000000..8a4452f --- /dev/null +++ b/src/interpreter/vm/stack/push.rs @@ -0,0 +1,10 @@ +use crate::interpreter::vm::value::Value; + +pub struct PushInstruction; + +impl PushInstruction { + pub fn eval>(stack: &mut Vec, value: T) -> anyhow::Result<()> { + stack.push(value.into()); + Ok(()) + } +} diff --git a/src/interpreter/vm/value.rs b/src/interpreter/vm/value.rs new file mode 100644 index 0000000..c0272b4 --- /dev/null +++ b/src/interpreter/vm/value.rs @@ -0,0 +1,63 @@ +#[derive(Debug, Clone, PartialEq)] +pub enum Value { + Int(i32), + Float32(f32), + Float64(f64), + Bool(bool), + Str(String), +} + +impl PartialOrd for Value { + fn partial_cmp(&self, other: &Self) -> Option { + match (self, other) { + (Value::Int(lhs), Value::Int(rhs)) => lhs.partial_cmp(rhs), + (Value::Float32(lhs), Value::Float32(rhs)) => lhs.partial_cmp(rhs), + (Value::Float64(lhs), Value::Float64(rhs)) => lhs.partial_cmp(rhs), + (Value::Str(lhs), Value::Str(rhs)) => lhs.partial_cmp(rhs), + (Value::Bool(lhs), Value::Bool(rhs)) => lhs.partial_cmp(rhs), + _ => None, + } + } +} + +impl From for Value { + fn from(value: i32) -> Self { + Self::Int(value) + } +} + +impl From for Value { + fn from(value: f32) -> Self { + Self::Float32(value) + } +} + +impl From for Value { + fn from(value: f64) -> Self { + Self::Float64(value) + } +} + +impl From for Value { + fn from(value: bool) -> Self { + Self::Bool(value) + } +} + +impl From for Value { + fn from(value: String) -> Self { + Self::Str(value) + } +} + +impl std::fmt::Display for Value { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Value::Int(value) => write!(f, "{}", value), + Value::Float32(value) => write!(f, "{}", value), + Value::Float64(value) => write!(f, "{}", value), + Value::Bool(value) => write!(f, "{}", value), + Value::Str(value) => write!(f, "{}", value), + } + } +} diff --git a/src/lexer/tokens.rs b/src/lexer/tokens.rs index b8a3da7..6737920 100644 --- a/src/lexer/tokens.rs +++ b/src/lexer/tokens.rs @@ -1,19 +1,27 @@ use logos::{Lexer, Logos}; use std::fmt::Display; -fn def_type(lex: &mut Lexer) -> Option<&'static str> { +#[derive(Debug, Clone, PartialEq)] +pub enum Type { + I32, + I64, + F32, + F64, +} + +fn def_type(lex: &mut Lexer) -> Option { let slice = lex.slice(); let slice = &slice[4..slice.len() - 1]; match slice { - "i32" => Some("i32"), - "i64" => Some("i64"), - "f32" => Some("f32"), - "f64" => Some("f64"), + "i32" => Some(Type::I32), + "i64" => Some(Type::I64), + "f32" => Some(Type::F32), + "f64" => Some(Type::F64), _ => None, } } -#[derive(Debug, Clone, Copy, PartialEq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum ArithmeticOperators { Plus, Minus, @@ -34,25 +42,73 @@ fn parse_arithmetic_op(lex: &mut Lexer) -> Option { } } -#[derive(Logos, Debug, Clone, Copy, PartialEq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum ComparisonOperators { + EqualTo, + NotEqualTo, + LessThan, + LessThanOrEqualTo, + GreaterThan, + GreaterThanOrEqualTo, +} + +fn parse_comparison_op(lex: &mut Lexer) -> Option { + let slice = lex.slice(); + match slice { + "=" => Some(ComparisonOperators::EqualTo), + "<>" => Some(ComparisonOperators::NotEqualTo), + "<" => Some(ComparisonOperators::LessThan), + "<=" => Some(ComparisonOperators::LessThanOrEqualTo), + ">" => Some(ComparisonOperators::GreaterThan), + ">=" => Some(ComparisonOperators::GreaterThanOrEqualTo), + _ => None, + } +} + +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum StackOperators { + Drop, + Dup, + Dump, +} + +fn parse_stack_op(lex: &mut Lexer) -> Option { + let slice = lex.slice(); + match slice { + "drop" => Some(StackOperators::Drop), + "dup" => Some(StackOperators::Dup), + "dump" => Some(StackOperators::Dump), + _ => None, + } +} + +fn parse_to_string(lex: &mut Lexer) -> Option { + let slice = lex.slice(); + // remove the quotes + let slice = &slice[1..slice.len() - 1]; + Some(slice.to_string()) +} + +#[derive(Logos, Debug, Clone, PartialEq)] pub enum Token { #[regex(r"[ \t\n\f]+", logos::skip)] #[error] Error, /// Integer literals - #[regex(r"[0-9]+", |lex| lex.slice().parse())] + #[regex(r"[0-9]+", |lex| lex.slice().parse(), priority = 2)] #[regex(r"0[xX][0-9a-fA-F]+", |lex| i32::from_str_radix(&lex.slice()[2..], 16))] #[regex(r"0b[0-1]+", |lex| i32::from_str_radix(&lex.slice()[2..], 2))] #[regex(r"0o[0-7]+", |lex| i32::from_str_radix(&lex.slice()[2..], 8))] Integer(i32), /// Float literals - #[regex("[0-9]+\\.[0-9]+")] - #[regex("\\.[0-9]+")] - #[regex("[0-9]+\\.")] - #[regex(r"[0-9]+e[0-9]+")] - Float, + #[regex(r"[+-]?([0-9]*[.])?[0-9]+([eE][+-]?[0-9]+)?", |lex| lex.slice().parse(), priority = 1)] + Float(f32), + + /// String literals + #[regex(r#""([^"\\]|\\.)*""#, parse_to_string)] + String(String), /// Operators /// Plus, minus, times, divide, modulo @@ -60,8 +116,8 @@ pub enum Token { ArithmeticOp(ArithmeticOperators), /// Comparison operators - #[regex(r"=|<>|<=|>=|<|>")] - ComparisonOp, + #[regex(r"=|<>|<=|>=|<|>", parse_comparison_op)] + ComparisonOp(ComparisonOperators), /// Cast (::) #[regex(r"::")] @@ -70,8 +126,8 @@ pub enum Token { /// Keywords /// Stack Ops - #[regex(r"drop|dup")] - StackOps, + #[regex(r"drop|dup|dump", parse_stack_op)] + StackOps(StackOperators), // @ sign #[token("@")] @@ -101,7 +157,7 @@ pub enum Token { /// Def Type (def(i32)) #[regex("def\\((i32|i64|f32|f64)\\)", def_type)] - DefType(&'static str), + DefType(Type), /// Identifiers #[regex(r"[a-zA-Z_][a-zA-Z0-9_]*")] @@ -110,6 +166,13 @@ pub enum Token { /// End of input #[regex(r"\$")] EndOfInput, + + /// Comments (\) + #[regex(r"\\.*", logos::skip)] + Comment, + + /// Decoy Program token + Program, } impl Display for Token { diff --git a/src/lib.rs b/src/lib.rs index e921b1c..30bd0ac 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,6 +2,7 @@ pub mod codegen; pub mod grammar; +pub mod interpreter; pub mod lexer; pub mod parser; pub mod semantic; diff --git a/src/main.rs b/src/main.rs index 9298d28..b91535a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,14 +1,21 @@ use miette::Result as MietteResult; use rusted_pile::{ codegen::{self, CodeGeneratorTarget}, - grammar, lexer, + grammar, + interpreter::vm, + lexer, parser::SLR::SLR, }; use std::fs; -fn main() -> MietteResult<(), Box> { +#[allow(dead_code)] +fn generate() -> MietteResult<(), Box> { + // Get the file name from the command line arguments + let args: Vec = std::env::args().collect(); + let filename = &args[1]; + // Lexer - let lang_contents = fs::read_to_string("assets/lang/test.pile")?; + let lang_contents = fs::read_to_string(format!("assets/lang/{}.pile", filename))?; let tokens = lexer::generate::compute_tokens(&lang_contents)?; // Parser @@ -22,8 +29,21 @@ fn main() -> MietteResult<(), Box> { .ok_or("Failed to parse")?; // Codegen - println!("{}", abstract_syntax_tree); - codegen::code_generator(CodeGeneratorTarget::LLVM).generate(abstract_syntax_tree)?; + // println!("{}", abstract_syntax_tree); + codegen::code_generator(CodeGeneratorTarget::VirtualMachine).generate(abstract_syntax_tree)?; + // codegen::code_generator(CodeGeneratorTarget::LLVM).generate(abstract_syntax_tree)?; + + Ok(()) +} +fn consume() -> MietteResult<(), Box> { + vm::VMInterpreter::run("bytecode.bin")?; + + Ok(()) +} + +fn main() -> MietteResult<(), Box> { + generate()?; + consume()?; Ok(()) } diff --git a/src/parser/parse.rs b/src/parser/parse.rs index aef6add..ff7b14f 100644 --- a/src/parser/parse.rs +++ b/src/parser/parse.rs @@ -153,7 +153,7 @@ impl SLR { stack.push_back(StackItem::State(*state)); } Action::Accept => { - println!("Accept"); + // println!("Accept"); break; } Action::Error => { @@ -205,44 +205,75 @@ impl SLR { pub struct AstNode { pub symbol: Symbol, pub children: Vec, + pub token: Token, } -impl Display for AstNode { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> std::fmt::Result { - writeln!(f, "AST")?; - write_ast_node(f, self, "", true) - } +pub struct AstNodeIter<'a> { + nodes: Vec<&'a AstNode>, } -fn write_ast_node( - f: &mut fmt::Formatter, - node: &AstNode, - prefix: &str, - is_last: bool, -) -> fmt::Result { - let (node_prefix, child_prefix) = if is_last { - ("\x1B[33m└─\x1B[0m", " ") - } else { - ("\x1B[33m├─\x1B[0m", "\x1B[33m│ \x1B[0m") - }; +pub struct AstNodeDetailedIter<'a> { + stack: Vec<(&'a AstNode, usize, bool)>, // (node, depth, is_last) +} - writeln!(f, "{}{}{}", prefix, node_prefix, node.symbol)?; +impl<'a> AstNode { + pub fn iter(&'a self) -> AstNodeIter<'a> { + AstNodeIter { nodes: vec![self] } + } - let children = ast_node_children(node); - for (i, child) in children.iter().enumerate() { - write_ast_node( - f, - child, - &format!("{}{}", prefix, child_prefix), - i == children.len() - 1, - )?; + pub fn detailed_iter(&'a self) -> AstNodeDetailedIter<'a> { + AstNodeDetailedIter { + stack: vec![(self, 0, true)], + } } +} - Ok(()) +impl<'a> Iterator for AstNodeIter<'a> { + type Item = &'a AstNode; + + fn next(&mut self) -> Option { + let node = self.nodes.pop()?; + self.nodes.extend(node.children.iter().rev()); // reverse to maintain order due to stack nature of vec + Some(node) + } +} + +impl<'a> Iterator for AstNodeDetailedIter<'a> { + type Item = (&'a AstNode, usize, bool); + + fn next(&mut self) -> Option { + let (node, depth, is_last) = self.stack.pop()?; + + for (i, child) in node.children.iter().enumerate().rev() { + let is_last = i == 0; + self.stack.push((child, depth + 1, is_last)); + } + + Some((node, depth, is_last)) + } } -fn ast_node_children(node: &AstNode) -> Vec<&AstNode> { - node.children.iter().collect() +impl Display for AstNode { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + writeln!(f, "AST")?; + for (node, depth, is_last) in self.detailed_iter() { + let prefix = if depth == 0 { + "".to_string() + } else { + format!( + "{}{}", + " ".repeat(depth - 1), + if is_last { + "\x1B[33m└─\x1B[0m" + } else { + "\x1B[33m├─\x1B[0m" + } + ) + }; + writeln!(f, "{}{}", prefix, node.symbol)?; + } + Ok(()) + } } fn parse_ast(node: &ParseTreeNode) -> Vec { @@ -262,10 +293,25 @@ fn parse_ast(node: &ParseTreeNode) -> Vec { } } else if let ParseTreeNode::Terminal(token) = current_node { match token { + Token::String(string) => { + stack.push(AstNode { + symbol: Symbol::Terminal(string.to_string()), + children: Vec::new(), + token: token.clone(), + }); + } Token::Integer(integer) => { stack.push(AstNode { symbol: Symbol::Terminal(integer.to_string()), children: Vec::new(), + token: token.clone(), + }); + } + Token::Float(float) => { + stack.push(AstNode { + symbol: Symbol::Terminal(float.to_string()), + children: Vec::new(), + token: token.clone(), }); } Token::ArithmeticOp { .. } => { @@ -274,6 +320,23 @@ fn parse_ast(node: &ParseTreeNode) -> Vec { stack.push(AstNode { symbol: Symbol::Terminal(token.to_string()), children: vec![left, right], + token: token.clone(), + }); + } + Token::StackOps { .. } => { + stack.push(AstNode { + symbol: Symbol::Terminal(token.to_string()), + children: Vec::new(), + token: token.clone(), + }); + } + Token::ComparisonOp { .. } => { + let right = stack.pop().unwrap(); + let left = stack.pop().unwrap(); + stack.push(AstNode { + symbol: Symbol::Terminal(token.to_string()), + children: vec![left, right], + token: token.clone(), }); } _ => {} @@ -292,6 +355,7 @@ fn parse_ast(node: &ParseTreeNode) -> Vec { stack = vec![AstNode { symbol: Symbol::NonTerminal("R".to_string()), children: stack, + token: Token::Program, }]; stack From e2b29f6369f4ae7617c87a31583afe8c2907138a Mon Sep 17 00:00:00 2001 From: Daniel Boll Date: Mon, 17 Jul 2023 21:21:42 -0300 Subject: [PATCH 06/10] chore(Cargo.toml): update logos dependency to version 0.12 feat(Cargo.toml): add serde and bincode dependencies feat(Cargo.toml): add once_cell dependency feat(assets/glc/lang.glc): add support for string literals in feat(assets/lang/arithmetic.pile): add arithmetic operations in Pile feat(assets/lang/conditional_and_branching.pile): add conditional and branching operations in Pile feat(assets/lang/test.pile): add test code in Pile feat(assets/lang/write.pile): add write code in Pile feat(justfile): add run and clean aliases --- Cargo.toml | 5 ++++- assets/glc/lang.glc | 1 + assets/lang/arithmetic.pile | 11 +++++++++++ assets/lang/conditional_and_branching.pile | 5 +++++ assets/lang/test.pile | 4 ++-- assets/lang/write.pile | 3 +++ justfile | 9 +++++++++ 7 files changed, 35 insertions(+), 3 deletions(-) create mode 100644 assets/lang/arithmetic.pile create mode 100644 assets/lang/conditional_and_branching.pile create mode 100644 assets/lang/write.pile create mode 100644 justfile diff --git a/Cargo.toml b/Cargo.toml index 0380778..f446b36 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,12 +6,15 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -logos = "0.12.1" +logos = "0.12" miette = { version = "5.5.0", features = ["fancy"] } thiserror = "1.0.38" anyhow = "1.0.69" singleton-manager = "0.1.4" +serde = { version = "1.0", features = ["derive"] } +bincode = "1.3.3" inkwell = { version = "0.2", features = ["llvm15-0"] } llvm-sys-150 = { package = "llvm-sys", version = "150.1.0", features = ["prefer-dynamic"] } lazy_static = "1.4" +once_cell = "1.17" diff --git a/assets/glc/lang.glc b/assets/glc/lang.glc index e4f9685..75885bc 100644 --- a/assets/glc/lang.glc +++ b/assets/glc/lang.glc @@ -3,6 +3,7 @@ -> ArithmeticOp | ComparisonOp | Identifier + | String | | | diff --git a/assets/lang/arithmetic.pile b/assets/lang/arithmetic.pile new file mode 100644 index 0000000..daf2dbc --- /dev/null +++ b/assets/lang/arithmetic.pile @@ -0,0 +1,11 @@ +\ vim: ft=forth + +\ Arithmetic operations in Pile + +1 2 + dump \ => 3 +2 2 * dump \ => 4 +10 2 / dump \ => 2 +10 3 - dump \ => 7 +10 3 % dump \ => 1 + +0 \ Return diff --git a/assets/lang/conditional_and_branching.pile b/assets/lang/conditional_and_branching.pile new file mode 100644 index 0000000..5b30acd --- /dev/null +++ b/assets/lang/conditional_and_branching.pile @@ -0,0 +1,5 @@ +\ vim: ft=forth + +1 2 < dump +2 1 < dump +2.1 1 > dump diff --git a/assets/lang/test.pile b/assets/lang/test.pile index daff1ca..4a32209 100644 --- a/assets/lang/test.pile +++ b/assets/lang/test.pile @@ -1,2 +1,2 @@ -1 2 3 * + -1 0x00 +1 2 + dump +"oi" 2 + dump diff --git a/assets/lang/write.pile b/assets/lang/write.pile new file mode 100644 index 0000000..1c826b8 --- /dev/null +++ b/assets/lang/write.pile @@ -0,0 +1,3 @@ +\ vim: ft=forth + +"Hello, %s\n" "world" printf diff --git a/justfile b/justfile new file mode 100644 index 0000000..7463eeb --- /dev/null +++ b/justfile @@ -0,0 +1,9 @@ +# vim: set ft=make ts=2 sw=2 noet: + +alias r := run + +run file: + cargo run --release -- {{file}} + +clean: + rm -rf output output.ll From 0252e2ae7d77240907525d7fb7df4114cdbfdce7 Mon Sep 17 00:00:00 2001 From: Daniel Boll Date: Mon, 17 Jul 2023 21:22:22 -0300 Subject: [PATCH 07/10] chore(.gitignore): add bytecode.bin and *.lock to the ignore list --- .gitignore | 2 + Cargo.lock | 645 ----------------------------------------------------- 2 files changed, 2 insertions(+), 645 deletions(-) delete mode 100644 Cargo.lock diff --git a/.gitignore b/.gitignore index ea8c4bf..f12fd8b 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ /target +bytecode.bin +*.lock diff --git a/Cargo.lock b/Cargo.lock deleted file mode 100644 index d66d5cd..0000000 --- a/Cargo.lock +++ /dev/null @@ -1,645 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 3 - -[[package]] -name = "addr2line" -version = "0.19.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a76fd60b23679b7d19bd066031410fb7e458ccc5e958eb5c325888ce4baedc97" -dependencies = [ - "gimli", -] - -[[package]] -name = "adler" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" - -[[package]] -name = "ahash" -version = "0.7.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" -dependencies = [ - "getrandom", - "once_cell", - "version_check", -] - -[[package]] -name = "aho-corasick" -version = "0.7.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac" -dependencies = [ - "memchr", -] - -[[package]] -name = "anyhow" -version = "1.0.69" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "224afbd727c3d6e4b90103ece64b8d1b67fbb1973b1046c2281eed3f3803f800" - -[[package]] -name = "atty" -version = "0.2.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" -dependencies = [ - "hermit-abi", - "libc", - "winapi", -] - -[[package]] -name = "autocfg" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" - -[[package]] -name = "backtrace" -version = "0.3.67" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "233d376d6d185f2a3093e58f283f60f880315b6c60075b01f36b3b85154564ca" -dependencies = [ - "addr2line", - "cc", - "cfg-if", - "libc", - "miniz_oxide", - "object", - "rustc-demangle", -] - -[[package]] -name = "beef" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1" - -[[package]] -name = "bitflags" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" - -[[package]] -name = "cc" -version = "1.0.79" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f" - -[[package]] -name = "cfg-if" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" - -[[package]] -name = "either" -version = "1.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91" - -[[package]] -name = "fnv" -version = "1.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" - -[[package]] -name = "getrandom" -version = "0.2.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31" -dependencies = [ - "cfg-if", - "libc", - "wasi", -] - -[[package]] -name = "gimli" -version = "0.27.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "221996f774192f0f718773def8201c4ae31f02616a54ccfc2d358bb0e5cefdec" - -[[package]] -name = "hashbrown" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" -dependencies = [ - "ahash", -] - -[[package]] -name = "hermit-abi" -version = "0.1.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" -dependencies = [ - "libc", -] - -[[package]] -name = "inkwell" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f4fcb4a4fa0b8f7b4178e24e6317d6f8b95ab500d8e6e1bd4283b6860e369c1" -dependencies = [ - "either", - "inkwell_internals", - "libc", - "llvm-sys", - "once_cell", - "parking_lot", -] - -[[package]] -name = "inkwell_internals" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b185e7d068d6820411502efa14d8fbf010750485399402156b72dd2a548ef8e9" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.16", -] - -[[package]] -name = "is_ci" -version = "1.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "616cde7c720bb2bb5824a224687d8f77bfd38922027f01d825cd7453be5099fb" - -[[package]] -name = "lazy_static" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" - -[[package]] -name = "libc" -version = "0.2.139" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "201de327520df007757c1f0adce6e827fe8562fbc28bfd9c15571c66ca1f5f79" - -[[package]] -name = "llvm-sys" -version = "150.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "58b2ce8adf5b4b7f4652994f522ea2639ad388f6ab6b85b229750decf2782d8a" -dependencies = [ - "cc", - "lazy_static", - "libc", - "regex", - "semver", -] - -[[package]] -name = "lock_api" -version = "0.4.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "435011366fe56583b16cf956f9df0095b405b82d76425bc8981c0e22e60ec4df" -dependencies = [ - "autocfg", - "scopeguard", -] - -[[package]] -name = "logos" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf8b031682c67a8e3d5446840f9573eb7fe26efe7ec8d195c9ac4c0647c502f1" -dependencies = [ - "logos-derive", -] - -[[package]] -name = "logos-derive" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1d849148dbaf9661a6151d1ca82b13bb4c4c128146a88d05253b38d4e2f496c" -dependencies = [ - "beef", - "fnv", - "proc-macro2", - "quote", - "regex-syntax", - "syn 1.0.107", -] - -[[package]] -name = "memchr" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" - -[[package]] -name = "miette" -version = "5.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4afd9b301defa984bbdbe112b4763e093ed191750a0d914a78c1106b2d0fe703" -dependencies = [ - "atty", - "backtrace", - "miette-derive", - "once_cell", - "owo-colors", - "supports-color", - "supports-hyperlinks", - "supports-unicode", - "terminal_size", - "textwrap", - "thiserror", - "unicode-width", -] - -[[package]] -name = "miette-derive" -version = "5.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97c2401ab7ac5282ca5c8b518a87635b1a93762b0b90b9990c509888eeccba29" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.107", -] - -[[package]] -name = "miniz_oxide" -version = "0.6.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b275950c28b37e794e8c55d88aeb5e139d0ce23fdbbeda68f8d7174abdf9e8fa" -dependencies = [ - "adler", -] - -[[package]] -name = "object" -version = "0.30.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea86265d3d3dcb6a27fc51bd29a4bf387fae9d2986b823079d4986af253eb439" -dependencies = [ - "memchr", -] - -[[package]] -name = "once_cell" -version = "1.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f61fba1741ea2b3d6a1e3178721804bb716a68a6aeba1149b5d52e3d464ea66" - -[[package]] -name = "owo-colors" -version = "3.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1b04fb49957986fdce4d6ee7a65027d55d4b6d2265e5848bbb507b58ccfdb6f" - -[[package]] -name = "parking_lot" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" -dependencies = [ - "lock_api", - "parking_lot_core", -] - -[[package]] -name = "parking_lot_core" -version = "0.9.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9069cbb9f99e3a5083476ccb29ceb1de18b9118cafa53e90c9551235de2b9521" -dependencies = [ - "cfg-if", - "libc", - "redox_syscall", - "smallvec", - "windows-sys", -] - -[[package]] -name = "proc-macro2" -version = "1.0.58" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa1fb82fc0c281dd9671101b66b771ebbe1eaf967b96ac8740dcba4b70005ca8" -dependencies = [ - "unicode-ident", -] - -[[package]] -name = "quote" -version = "1.0.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f4f29d145265ec1c483c7c654450edde0bfe043d3938d6972630663356d9500" -dependencies = [ - "proc-macro2", -] - -[[package]] -name = "redox_syscall" -version = "0.2.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" -dependencies = [ - "bitflags", -] - -[[package]] -name = "regex" -version = "1.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48aaa5748ba571fb95cd2c85c09f629215d3a6ece942baa100950af03a34f733" -dependencies = [ - "aho-corasick", - "memchr", - "regex-syntax", -] - -[[package]] -name = "regex-syntax" -version = "0.6.28" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "456c603be3e8d448b072f410900c09faf164fbce2d480456f50eea6e25f9c848" - -[[package]] -name = "rustc-demangle" -version = "0.1.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ef03e0a2b150c7a90d01faf6254c9c48a41e95fb2a8c2ac1c6f0d2b9aefc342" - -[[package]] -name = "rusted-pile" -version = "0.1.0" -dependencies = [ - "anyhow", - "inkwell", - "lazy_static", - "llvm-sys", - "logos", - "miette", - "singleton-manager", - "thiserror", -] - -[[package]] -name = "scopeguard" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" - -[[package]] -name = "semver" -version = "1.0.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bebd363326d05ec3e2f532ab7660680f3b02130d780c299bca73469d521bc0ed" - -[[package]] -name = "singleton-manager" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42f5c61bd18b12d0cfef0d2fdd8cf0ae8e177be98c9b56dceeacf12f692e9192" -dependencies = [ - "uuid", -] - -[[package]] -name = "smallvec" -version = "1.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0" - -[[package]] -name = "smawk" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f67ad224767faa3c7d8b6d91985b78e70a1324408abcb1cfcc2be4c06bc06043" - -[[package]] -name = "supports-color" -version = "1.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ba6faf2ca7ee42fdd458f4347ae0a9bd6bcc445ad7cb57ad82b383f18870d6f" -dependencies = [ - "atty", - "is_ci", -] - -[[package]] -name = "supports-hyperlinks" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "590b34f7c5f01ecc9d78dba4b3f445f31df750a67621cf31626f3b7441ce6406" -dependencies = [ - "atty", -] - -[[package]] -name = "supports-unicode" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8b945e45b417b125a8ec51f1b7df2f8df7920367700d1f98aedd21e5735f8b2" -dependencies = [ - "atty", -] - -[[package]] -name = "syn" -version = "1.0.107" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f4064b5b16e03ae50984a5a8ed5d4f8803e6bc1fd170a3cda91a1be4b18e3f5" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "syn" -version = "2.0.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6f671d4b5ffdb8eadec19c0ae67fe2639df8684bd7bc4b83d986b8db549cf01" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "terminal_size" -version = "0.1.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "633c1a546cee861a1a6d0dc69ebeca693bf4296661ba7852b9d21d159e0506df" -dependencies = [ - "libc", - "winapi", -] - -[[package]] -name = "textwrap" -version = "0.15.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7b3e525a49ec206798b40326a44121291b530c963cfb01018f63e135bac543d" -dependencies = [ - "smawk", - "unicode-linebreak", - "unicode-width", -] - -[[package]] -name = "thiserror" -version = "1.0.38" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a9cd18aa97d5c45c6603caea1da6628790b37f7a34b6ca89522331c5180fed0" -dependencies = [ - "thiserror-impl", -] - -[[package]] -name = "thiserror-impl" -version = "1.0.38" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fb327af4685e4d03fa8cbcf1716380da910eeb2bb8be417e7f9fd3fb164f36f" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.107", -] - -[[package]] -name = "unicode-ident" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84a22b9f218b40614adcb3f4ff08b703773ad44fa9423e4e0d346d5db86e4ebc" - -[[package]] -name = "unicode-linebreak" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5faade31a542b8b35855fff6e8def199853b2da8da256da52f52f1316ee3137" -dependencies = [ - "hashbrown", - "regex", -] - -[[package]] -name = "unicode-width" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" - -[[package]] -name = "uuid" -version = "1.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "345444e32442451b267fc254ae85a209c64be56d2890e601a0c37ff0c3c5ecd2" -dependencies = [ - "getrandom", -] - -[[package]] -name = "version_check" -version = "0.9.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" - -[[package]] -name = "wasi" -version = "0.11.0+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" - -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - -[[package]] -name = "windows-sys" -version = "0.45.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" -dependencies = [ - "windows-targets", -] - -[[package]] -name = "windows-targets" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" -dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", -] - -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" - -[[package]] -name = "windows_aarch64_msvc" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" - -[[package]] -name = "windows_i686_gnu" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" - -[[package]] -name = "windows_i686_msvc" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" - -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" From e6f155aa4bb01666d6fccf6dfcb405bc211e6611 Mon Sep 17 00:00:00 2001 From: Daniel Boll Date: Tue, 18 Jul 2023 20:24:56 -0300 Subject: [PATCH 08/10] fix(conditional_and_branching.pile): fix indentation and comments feat(conditional_and_branching.pile): add if-else-end block fix(mod.rs): fix generate_byte_code function to use self instead of Self feat(mod.rs): add support for JumpIfNotTrue, Jump, and Ignore bytecodes fix(parse.rs): fix parse_ast function to return MietteResult --- assets/lang/conditional_and_branching.pile | 14 +++- src/codegen/vm/mod.rs | 87 +++++++++++++++++++--- src/interpreter/vm/mod.rs | 33 +++++++- src/parser/parse.rs | 37 ++++++++- 4 files changed, 150 insertions(+), 21 deletions(-) diff --git a/assets/lang/conditional_and_branching.pile b/assets/lang/conditional_and_branching.pile index 5b30acd..147bebd 100644 --- a/assets/lang/conditional_and_branching.pile +++ b/assets/lang/conditional_and_branching.pile @@ -1,5 +1,13 @@ \ vim: ft=forth -1 2 < dump -2 1 < dump -2.1 1 > dump +\ 1 2 < dump +\ 2 1 < dump +\ 2.1 1 > dump + +2 1 < if + 1 dump +else + 2 dump +end + +3 dump diff --git a/src/codegen/vm/mod.rs b/src/codegen/vm/mod.rs index 213ef9b..131cb16 100644 --- a/src/codegen/vm/mod.rs +++ b/src/codegen/vm/mod.rs @@ -33,36 +33,54 @@ pub enum ByteCode { Gt, Leq, Geq, + + // Branching + JumpIfNotTrue(usize), + Jump(usize), + + // Ignore + Ignore, } -pub struct VMCodeGenerator; +pub struct VMCodeGenerator { + branching_blocks: Vec, + instructions_count: usize, + bytecode: Vec, +} impl VMCodeGenerator { pub fn new() -> Self { - Self {} + Self { + branching_blocks: vec![], + instructions_count: 0, + bytecode: vec![], + } } pub fn generate_two_children_code( + &mut self, left: &AstNode, right: &AstNode, opcode: ByteCode, ) -> anyhow::Result> { - let mut bytecode = VMCodeGenerator::generate_byte_code(left)?; - bytecode.append(&mut VMCodeGenerator::generate_byte_code(right)?); + let mut bytecode = self.generate_byte_code(left)?; + bytecode.append(&mut self.generate_byte_code(right)?); bytecode.push(opcode); + self.instructions_count += 2; Ok(bytecode) } - pub fn generate_byte_code(ast: &AstNode) -> anyhow::Result> { + pub fn generate_byte_code(&mut self, ast: &AstNode) -> anyhow::Result> { match ast.token.clone() { Token::Program => { - let mut bytecode = vec![]; - for child in ast.children.iter() { - bytecode.append(&mut VMCodeGenerator::generate_byte_code(child)?); + self.instructions_count += 1; + let mut bytecode = self.generate_byte_code(child)?; + + self.bytecode.append(&mut bytecode); } - Ok(bytecode) + Ok(self.bytecode.clone()) } Token::Integer(value) => Ok(vec![ByteCode::PushInt(value)]), Token::Float(value) => Ok(vec![ByteCode::PushFloat(value)]), @@ -90,7 +108,7 @@ impl VMCodeGenerator { .collect::>() .get(&operator) { - Self::generate_two_children_code(&ast.children[0], &ast.children[1], opcode.clone()) + self.generate_two_children_code(&ast.children[0], &ast.children[1], opcode.clone()) } else { Err(anyhow::anyhow!( "Currently unsupported token: {:?}", @@ -114,7 +132,7 @@ impl VMCodeGenerator { .collect::>() .get(&operator) { - Self::generate_two_children_code(&ast.children[0], &ast.children[1], opcode.clone()) + self.generate_two_children_code(&ast.children[0], &ast.children[1], opcode.clone()) } else { Err(anyhow::anyhow!( "Currently unsupported token: {:?}", @@ -122,6 +140,50 @@ impl VMCodeGenerator { )) } } + Token::If => { + let mut bytecode = vec![]; + let mut condition = self.generate_byte_code(&ast.children[0])?; + + self.branching_blocks.push(self.instructions_count); + bytecode.append(&mut condition); + bytecode.push(ByteCode::JumpIfNotTrue(usize::MAX)); // Placeholder position for now + self.instructions_count += 1; + + Ok(bytecode) + } + Token::Else => { + if let Some(if_intruction_location) = self.branching_blocks.pop() { + if let ByteCode::JumpIfNotTrue(_) = &mut self.bytecode[if_intruction_location] { + self.bytecode[if_intruction_location] = + ByteCode::JumpIfNotTrue(self.instructions_count); + } + } else { + return Err(anyhow::anyhow!("Mismatched 'else'")); + } + self.branching_blocks.push(self.instructions_count - 1); + + Ok(vec![ByteCode::Jump(usize::MAX)]) + } + Token::End => { + if let Some(branch_intruction_location) = self.branching_blocks.pop() { + match &mut self.bytecode[branch_intruction_location] { + ByteCode::JumpIfNotTrue(_) => { + self.bytecode[branch_intruction_location] = + ByteCode::JumpIfNotTrue(self.instructions_count); + } + ByteCode::Jump(_) => { + self.bytecode[branch_intruction_location] = ByteCode::Jump(self.instructions_count); + } + _ => { + return Err(anyhow::anyhow!("Mismatched 'end' (1)")); + } + } + + Ok(vec![ByteCode::Ignore]) + } else { + Err(anyhow::anyhow!("Mismatched 'end' (2)")) + } + } _ => Err(anyhow::anyhow!( "Currently unsupported token: {:?}", ast.token @@ -152,7 +214,8 @@ impl Default for VMCodeGenerator { impl CodeGenerator for VMCodeGenerator { fn generate(&mut self, ast: AstNode) -> anyhow::Result<()> { - let bytecode = VMCodeGenerator::generate_byte_code(&ast)?; + let mut generator = VMCodeGenerator::new(); + let bytecode = generator.generate_byte_code(&ast)?; println!("{:?}", bytecode); VMCodeGenerator::encode_byte_code(bytecode)?; diff --git a/src/interpreter/vm/mod.rs b/src/interpreter/vm/mod.rs index 84ad013..560ea63 100644 --- a/src/interpreter/vm/mod.rs +++ b/src/interpreter/vm/mod.rs @@ -40,16 +40,30 @@ impl VMInterpreter { pub struct VM { stack: Vec, + instruction_counter: usize, } impl VM { /// Creates a new [`VM`]. pub fn new() -> Self { - Self { stack: vec![] } + Self { + stack: vec![], + instruction_counter: 0, + } } pub fn execute(&mut self, bytecode: &[ByteCode]) -> anyhow::Result<()> { - for instruction in bytecode { + while self.instruction_counter < bytecode.len() { + let instruction = &bytecode[self.instruction_counter]; + + println!( + "{}{:?}", + format!( + "{: <24} | ", + format!("[{}] {:?}", self.instruction_counter, instruction) + ), + self.stack + ); match instruction { // Stack ByteCode::PushInt(value) => PushInstruction::eval(&mut self.stack, *value)?, @@ -80,7 +94,22 @@ impl VM { ByteCode::Geq => { ComparisonInstruction::eval(&mut self.stack, ComparisonMethod::GreaterThanEqual)? } + + // Control flow + ByteCode::JumpIfNotTrue(new_counter) => { + if let Some(Value::Bool(value)) = self.stack.pop() { + if !value { + self.instruction_counter = *new_counter - 1; + } + } + } + ByteCode::Jump(new_counter) => { + self.instruction_counter = *new_counter - 1; + } + ByteCode::Ignore => {} } + + self.instruction_counter += 1; // Increment the instruction counter after each instruction } Ok(()) diff --git a/src/parser/parse.rs b/src/parser/parse.rs index ff7b14f..7bbe16e 100644 --- a/src/parser/parse.rs +++ b/src/parser/parse.rs @@ -175,7 +175,7 @@ impl SLR { } } - let ast = parse_ast(&parse_stack[0]); + let ast = parse_ast(&parse_stack[0])?; Ok(Some(ast[0].clone())) } @@ -276,7 +276,7 @@ impl Display for AstNode { } } -fn parse_ast(node: &ParseTreeNode) -> Vec { +fn parse_ast(node: &ParseTreeNode) -> MietteResult> { // Iterate for each through the leaves of the tree, if the leave is a Integer push it to the // stack, if it is a operator pop the last two elements of the stack and create a new node // with the operator and the two elements as children. Append the new node to the stack. @@ -339,7 +339,36 @@ fn parse_ast(node: &ParseTreeNode) -> Vec { token: token.clone(), }); } - _ => {} + Token::If => { + let condition = stack.pop().unwrap(); + stack.push(AstNode { + symbol: Symbol::Terminal(token.to_string()), + children: vec![condition], + token: token.clone(), + }); + } + Token::Else => { + stack.push(AstNode { + symbol: Symbol::Terminal(token.to_string()), + children: vec![], + token: token.clone(), + }); + } + Token::End => { + stack.push(AstNode { + symbol: Symbol::Terminal(token.to_string()), + children: Vec::new(), + token: token.clone(), + }); + } + // Given an error saying that the token is currently not done + _ => { + return Err(ParseError::UnexpectedToken { + input: token.to_string(), + extension_src: (0, token.to_string().len() - 1), + advice: "Token is currently not supported".to_string(), + })?; + } } } @@ -358,5 +387,5 @@ fn parse_ast(node: &ParseTreeNode) -> Vec { token: Token::Program, }]; - stack + Ok(stack) } From f9e63ef492eb225c53a4002d6898faec0717bbfc Mon Sep 17 00:00:00 2001 From: Daniel Boll Date: Tue, 18 Jul 2023 21:21:27 -0300 Subject: [PATCH 09/10] feat(cli): add compile and run subcommands feat(cli/compile.rs): implement compile subcommand feat(cli/run.rs): implement run subcommand feat(codegen/mod.rs): add filename parameter to generate method feat(codegen/vm/mod.rs): add filename parameter to encode_byte_code method fix(main.rs): use Cli struct to parse command line arguments and execute appropriate subcommand --- Cargo.toml | 1 + src/cli/compile.rs | 61 +++++++++++++++++++++++++++++++++++++++++ src/cli/mod.rs | 20 ++++++++++++++ src/cli/run.rs | 20 ++++++++++++++ src/codegen/llvm/mod.rs | 2 +- src/codegen/mod.rs | 2 +- src/codegen/vm/mod.rs | 10 +++---- src/codegen/wasm/mod.rs | 2 +- src/lib.rs | 1 + src/main.rs | 51 ++++++---------------------------- 10 files changed, 119 insertions(+), 51 deletions(-) create mode 100644 src/cli/compile.rs create mode 100644 src/cli/mod.rs create mode 100644 src/cli/run.rs diff --git a/Cargo.toml b/Cargo.toml index f446b36..d113f00 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,6 +13,7 @@ anyhow = "1.0.69" singleton-manager = "0.1.4" serde = { version = "1.0", features = ["derive"] } bincode = "1.3.3" +clap = { version = "4.3.16", features = ["derive", "color"] } inkwell = { version = "0.2", features = ["llvm15-0"] } llvm-sys-150 = { package = "llvm-sys", version = "150.1.0", features = ["prefer-dynamic"] } diff --git a/src/cli/compile.rs b/src/cli/compile.rs new file mode 100644 index 0000000..4b7e7c2 --- /dev/null +++ b/src/cli/compile.rs @@ -0,0 +1,61 @@ +use clap::{Args, ValueEnum}; +use miette::Result as MietteResult; + +use crate::{ + codegen::{self, CodeGeneratorTarget}, + grammar, lexer, + parser::SLR::SLR, +}; + +use super::PileCompiler; + +#[allow(clippy::upper_case_acronyms)] +#[derive(ValueEnum, Clone)] +pub enum Codegen { + VM, + LLVM, +} + +#[derive(Args)] +pub struct Compile { + #[arg(required = true, short, long)] + pub filename: String, + + #[arg(short, long, default_value = "VM")] + pub codegen: Codegen, + + #[arg(short, long, default_value = "output")] + pub output: String, +} + +impl PileCompiler { + pub fn compile( + Compile { + filename, + codegen, + output, + }: &Compile, + ) -> MietteResult<(), Box> { + // Lexer + let lang_contents = std::fs::read_to_string(filename)?; + let tokens = lexer::generate::compute_tokens(&lang_contents)?; + + // Parser + let glc_contents = std::fs::read_to_string("assets/glc/lang.glc")?; + let mut glc = grammar::parser::parse(&glc_contents)?; + + glc.compute_follow_set().expand(); + + let abstract_syntax_tree = SLR::new(glc) + .parse(tokens, &lang_contents)? + .ok_or("Failed to parse")?; + + match codegen { + Codegen::VM => codegen::code_generator(CodeGeneratorTarget::VirtualMachine), + Codegen::LLVM => codegen::code_generator(CodeGeneratorTarget::LLVM), + } + .generate(abstract_syntax_tree, output.clone())?; + + Ok(()) + } +} diff --git a/src/cli/mod.rs b/src/cli/mod.rs new file mode 100644 index 0000000..031d25a --- /dev/null +++ b/src/cli/mod.rs @@ -0,0 +1,20 @@ +use clap::{Parser, Subcommand}; + +pub mod compile; +pub mod run; + +#[derive(Parser)] +#[command(author, version, about, long_about = None)] +#[command(propagate_version = true)] +pub struct Cli { + #[command(subcommand)] + pub command: Commands, +} + +#[derive(Subcommand)] +pub enum Commands { + Compile(compile::Compile), + Run(run::Run), +} + +pub struct PileCompiler; diff --git a/src/cli/run.rs b/src/cli/run.rs new file mode 100644 index 0000000..5a35145 --- /dev/null +++ b/src/cli/run.rs @@ -0,0 +1,20 @@ +use clap::Args; + +use crate::interpreter::vm; +use miette::Result as MietteResult; + +use super::PileCompiler; + +#[derive(Args)] +pub struct Run { + #[arg(required = true, short, long)] + pub filename: String, +} + +impl PileCompiler { + pub fn run(Run { filename }: &Run) -> MietteResult<(), Box> { + vm::VMInterpreter::run(filename)?; + + Ok(()) + } +} diff --git a/src/codegen/llvm/mod.rs b/src/codegen/llvm/mod.rs index 5f8ae01..5665de5 100644 --- a/src/codegen/llvm/mod.rs +++ b/src/codegen/llvm/mod.rs @@ -21,7 +21,7 @@ pub mod globals; pub struct LLVMCodeGenerator; impl CodeGenerator for LLVMCodeGenerator { - fn generate(&mut self, ast: AstNode) -> anyhow::Result<()> { + fn generate(&mut self, ast: AstNode, _filename: String) -> anyhow::Result<()> { // This trick is to ensure that stack is dropped before context let stack; { diff --git a/src/codegen/mod.rs b/src/codegen/mod.rs index 66a06cc..936ef6e 100644 --- a/src/codegen/mod.rs +++ b/src/codegen/mod.rs @@ -1,7 +1,7 @@ use crate::parser::parse::AstNode; pub trait CodeGenerator { - fn generate(&mut self, ast: AstNode) -> anyhow::Result<()>; + fn generate(&mut self, ast: AstNode, filename: String) -> anyhow::Result<()>; } pub enum CodeGeneratorTarget { diff --git a/src/codegen/vm/mod.rs b/src/codegen/vm/mod.rs index 131cb16..a3fb39a 100644 --- a/src/codegen/vm/mod.rs +++ b/src/codegen/vm/mod.rs @@ -191,13 +191,13 @@ impl VMCodeGenerator { } } - pub fn encode_byte_code(bytecode: Vec) -> anyhow::Result<()> { + pub fn encode_byte_code(bytecode: Vec, filename: String) -> anyhow::Result<()> { let encoded: Vec = bincode::serialize(&bytecode).unwrap(); use std::io::Write; - let mut file = - File::create("bytecode.bin").map_err(|e| anyhow::anyhow!("Error creating file: {}", e))?; + let mut file = File::create(format!("{filename}.bin")) + .map_err(|e| anyhow::anyhow!("Error creating file: {}", e))?; file .write_all(&encoded) .map_err(|e| anyhow::anyhow!("Error writing to file: {}", e))?; @@ -213,11 +213,11 @@ impl Default for VMCodeGenerator { } impl CodeGenerator for VMCodeGenerator { - fn generate(&mut self, ast: AstNode) -> anyhow::Result<()> { + fn generate(&mut self, ast: AstNode, filename: String) -> anyhow::Result<()> { let mut generator = VMCodeGenerator::new(); let bytecode = generator.generate_byte_code(&ast)?; println!("{:?}", bytecode); - VMCodeGenerator::encode_byte_code(bytecode)?; + VMCodeGenerator::encode_byte_code(bytecode, filename)?; Ok(()) } diff --git a/src/codegen/wasm/mod.rs b/src/codegen/wasm/mod.rs index e139181..fed7245 100644 --- a/src/codegen/wasm/mod.rs +++ b/src/codegen/wasm/mod.rs @@ -17,7 +17,7 @@ impl Default for WasmCodeGenerator { } impl CodeGenerator for WasmCodeGenerator { - fn generate(&mut self, _ast: AstNode) -> anyhow::Result<()> { + fn generate(&mut self, _ast: AstNode, _filename: String) -> anyhow::Result<()> { Ok(()) } } diff --git a/src/lib.rs b/src/lib.rs index 30bd0ac..f67b0e4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -6,3 +6,4 @@ pub mod interpreter; pub mod lexer; pub mod parser; pub mod semantic; +pub mod cli; diff --git a/src/main.rs b/src/main.rs index b91535a..909e3b4 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,49 +1,14 @@ +use clap::Parser; use miette::Result as MietteResult; -use rusted_pile::{ - codegen::{self, CodeGeneratorTarget}, - grammar, - interpreter::vm, - lexer, - parser::SLR::SLR, -}; -use std::fs; +use rusted_pile::cli::{Cli, Commands, PileCompiler}; -#[allow(dead_code)] -fn generate() -> MietteResult<(), Box> { - // Get the file name from the command line arguments - let args: Vec = std::env::args().collect(); - let filename = &args[1]; - - // Lexer - let lang_contents = fs::read_to_string(format!("assets/lang/{}.pile", filename))?; - let tokens = lexer::generate::compute_tokens(&lang_contents)?; - - // Parser - let glc_contents = fs::read_to_string("assets/glc/lang.glc")?; - let mut glc = grammar::parser::parse(&glc_contents)?; - - glc.compute_follow_set().expand(); - - let abstract_syntax_tree = SLR::new(glc) - .parse(tokens, &lang_contents)? - .ok_or("Failed to parse")?; - - // Codegen - // println!("{}", abstract_syntax_tree); - codegen::code_generator(CodeGeneratorTarget::VirtualMachine).generate(abstract_syntax_tree)?; - // codegen::code_generator(CodeGeneratorTarget::LLVM).generate(abstract_syntax_tree)?; - - Ok(()) -} - -fn consume() -> MietteResult<(), Box> { - vm::VMInterpreter::run("bytecode.bin")?; +fn main() -> MietteResult<(), Box> { + let cli = Cli::parse(); - Ok(()) -} + match &cli.command { + Commands::Compile(opts) => PileCompiler::compile(opts)?, + Commands::Run(opts) => PileCompiler::run(opts)?, + } -fn main() -> MietteResult<(), Box> { - generate()?; - consume()?; Ok(()) } From 20c4c09d6680b294da95331d96a6805c643879bf Mon Sep 17 00:00:00 2001 From: Felipi Lima Matozinho Date: Wed, 19 Jul 2023 22:21:02 -0300 Subject: [PATCH 10/10] feat(semantic): add semantic validation Signed-off-by: Felipi Lima Matozinho --- Cargo.toml | 4 +- src/codegen/mod.rs | 6 +- src/lexer/tokens.rs | 14 ++ src/main.rs | 5 +- src/parser/parse.rs | 33 ++-- src/semantic/errors.rs | 104 +++++++++++ src/semantic/mod.rs | 113 ++++++++++++ src/semantic/stack_frame.rs | 8 + src/semantic/symbol_table.rs | 333 +++++++++++++++++++++++++++++++++++ 9 files changed, 602 insertions(+), 18 deletions(-) create mode 100644 src/semantic/errors.rs create mode 100644 src/semantic/stack_frame.rs create mode 100644 src/semantic/symbol_table.rs diff --git a/Cargo.toml b/Cargo.toml index f446b36..9c7b3be 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,7 +14,7 @@ singleton-manager = "0.1.4" serde = { version = "1.0", features = ["derive"] } bincode = "1.3.3" -inkwell = { version = "0.2", features = ["llvm15-0"] } -llvm-sys-150 = { package = "llvm-sys", version = "150.1.0", features = ["prefer-dynamic"] } +# inkwell = { version = "0.2", features = ["llvm15-0"] } +# llvm-sys-150 = { package = "llvm-sys", version = "150.1.0", features = ["prefer-dynamic"] } lazy_static = "1.4" once_cell = "1.17" diff --git a/src/codegen/mod.rs b/src/codegen/mod.rs index 66a06cc..feca9aa 100644 --- a/src/codegen/mod.rs +++ b/src/codegen/mod.rs @@ -5,19 +5,19 @@ pub trait CodeGenerator { } pub enum CodeGeneratorTarget { - LLVM, + // LLVM, Wasm, VirtualMachine, } -pub mod llvm; +// pub mod llvm; pub mod vm; pub mod wasm; // Choose the code generator based on the target pub fn code_generator(target: CodeGeneratorTarget) -> Box { match target { - CodeGeneratorTarget::LLVM => Box::::default(), + // CodeGeneratorTarget::LLVM => Box::::default(), CodeGeneratorTarget::Wasm => Box::::default(), CodeGeneratorTarget::VirtualMachine => Box::::default(), } diff --git a/src/lexer/tokens.rs b/src/lexer/tokens.rs index 6737920..f8bfa3f 100644 --- a/src/lexer/tokens.rs +++ b/src/lexer/tokens.rs @@ -89,6 +89,15 @@ fn parse_to_string(lex: &mut Lexer) -> Option { Some(slice.to_string()) } +fn to_boolean(lex: &mut Lexer) -> Option { + let slice = lex.slice(); + match slice { + "true" => Some(true), + "false" => Some(false), + _ => None, + } +} + #[derive(Logos, Debug, Clone, PartialEq)] pub enum Token { #[regex(r"[ \t\n\f]+", logos::skip)] @@ -106,6 +115,11 @@ pub enum Token { #[regex(r"[+-]?([0-9]*[.])?[0-9]+([eE][+-]?[0-9]+)?", |lex| lex.slice().parse(), priority = 1)] Float(f32), + /// Boolean literals + #[token("true", to_boolean)] + #[token("false", to_boolean)] + Boolean(bool), + /// String literals #[regex(r#""([^"\\]|\\.)*""#, parse_to_string)] String(String), diff --git a/src/main.rs b/src/main.rs index b91535a..19e5cbb 100644 --- a/src/main.rs +++ b/src/main.rs @@ -5,6 +5,7 @@ use rusted_pile::{ interpreter::vm, lexer, parser::SLR::SLR, + semantic, }; use std::fs; @@ -15,7 +16,7 @@ fn generate() -> MietteResult<(), Box> { let filename = &args[1]; // Lexer - let lang_contents = fs::read_to_string(format!("assets/lang/{}.pile", filename))?; + let lang_contents = fs::read_to_string(filename)?; let tokens = lexer::generate::compute_tokens(&lang_contents)?; // Parser @@ -28,6 +29,8 @@ fn generate() -> MietteResult<(), Box> { .parse(tokens, &lang_contents)? .ok_or("Failed to parse")?; + semantic::SemanticAnalyzer::new(lang_contents).analyze(&abstract_syntax_tree)?; + // Codegen // println!("{}", abstract_syntax_tree); codegen::code_generator(CodeGeneratorTarget::VirtualMachine).generate(abstract_syntax_tree)?; diff --git a/src/parser/parse.rs b/src/parser/parse.rs index ff7b14f..16bec10 100644 --- a/src/parser/parse.rs +++ b/src/parser/parse.rs @@ -15,8 +15,8 @@ use super::SLR::SLR; #[derive(Debug)] pub enum ParseTreeNode { - Terminal(Token), - NonTerminal(Symbol, Vec), + Terminal(Token, (usize, usize)), + NonTerminal(Symbol, Vec, (usize, usize)), } impl fmt::Display for ParseTreeNode { @@ -33,8 +33,8 @@ fn write_node( is_last: bool, ) -> fmt::Result { let symbol = match node { - ParseTreeNode::Terminal(token) => format!("\x1B[1m{}\x1B[0m", token), - ParseTreeNode::NonTerminal(symbol, _) => symbol.to_string(), + ParseTreeNode::Terminal(token, _) => format!("\x1B[1m{}\x1B[0m", token), + ParseTreeNode::NonTerminal(symbol, _, _) => symbol.to_string(), }; let (node_prefix, child_prefix) = if is_last { ("\x1B[33m└─\x1B[0m", " ") @@ -43,8 +43,8 @@ fn write_node( }; writeln!(f, "{}{}{}", prefix, node_prefix, symbol)?; let child_count = match node { - ParseTreeNode::Terminal(_) => 0, - ParseTreeNode::NonTerminal(_, children) => children.len(), + ParseTreeNode::Terminal(..) => 0, + ParseTreeNode::NonTerminal(_, children, _) => children.len(), }; for (i, child) in node_children(node).iter().enumerate() { let child_prefix = format!("{}{}", prefix, child_prefix); @@ -56,8 +56,8 @@ fn write_node( fn node_children(node: &ParseTreeNode) -> Vec<&ParseTreeNode> { match node { - ParseTreeNode::Terminal(_) => vec![], - ParseTreeNode::NonTerminal(_, children) => children.iter().collect(), + ParseTreeNode::Terminal(..) => vec![], + ParseTreeNode::NonTerminal(_, children, _) => children.iter().collect(), } } @@ -110,7 +110,7 @@ impl SLR { } else { Symbol::Terminal(current_token.to_string()) }; - parse_stack.push(ParseTreeNode::Terminal(current_token)); + parse_stack.push(ParseTreeNode::Terminal(current_token, span_to_tuple(span))); stack.push_back(StackItem::Symbol(symbol)); stack.push_back(StackItem::State(*shift_state)); next = input.next(); @@ -134,7 +134,7 @@ impl SLR { children.reverse(); - let node = ParseTreeNode::NonTerminal(lhs.clone(), children); + let node = ParseTreeNode::NonTerminal(lhs.clone(), children, span_to_tuple(span)); parse_stack.push(node); let top = if let StackItem::State(state) = stack.back().unwrap() { @@ -206,6 +206,7 @@ pub struct AstNode { pub symbol: Symbol, pub children: Vec, pub token: Token, + pub span: (usize, usize), } pub struct AstNodeIter<'a> { @@ -287,17 +288,18 @@ fn parse_ast(node: &ParseTreeNode) -> Vec { let mut traverse_stack: Vec<&ParseTreeNode> = Vec::new(); let mut current_node = node; loop { - if let ParseTreeNode::NonTerminal(_, children) = current_node { + if let ParseTreeNode::NonTerminal(_, children, _) = current_node { for child in children.iter().rev() { traverse_stack.push(child); } - } else if let ParseTreeNode::Terminal(token) = current_node { + } else if let ParseTreeNode::Terminal(token, span) = current_node { match token { Token::String(string) => { stack.push(AstNode { symbol: Symbol::Terminal(string.to_string()), children: Vec::new(), token: token.clone(), + span: *span, }); } Token::Integer(integer) => { @@ -305,6 +307,7 @@ fn parse_ast(node: &ParseTreeNode) -> Vec { symbol: Symbol::Terminal(integer.to_string()), children: Vec::new(), token: token.clone(), + span: *span, }); } Token::Float(float) => { @@ -312,6 +315,7 @@ fn parse_ast(node: &ParseTreeNode) -> Vec { symbol: Symbol::Terminal(float.to_string()), children: Vec::new(), token: token.clone(), + span: *span, }); } Token::ArithmeticOp { .. } => { @@ -321,6 +325,7 @@ fn parse_ast(node: &ParseTreeNode) -> Vec { symbol: Symbol::Terminal(token.to_string()), children: vec![left, right], token: token.clone(), + span: *span, }); } Token::StackOps { .. } => { @@ -328,15 +333,18 @@ fn parse_ast(node: &ParseTreeNode) -> Vec { symbol: Symbol::Terminal(token.to_string()), children: Vec::new(), token: token.clone(), + span: *span, }); } Token::ComparisonOp { .. } => { let right = stack.pop().unwrap(); let left = stack.pop().unwrap(); + stack.push(AstNode { symbol: Symbol::Terminal(token.to_string()), children: vec![left, right], token: token.clone(), + span: *span, }); } _ => {} @@ -356,6 +364,7 @@ fn parse_ast(node: &ParseTreeNode) -> Vec { symbol: Symbol::NonTerminal("R".to_string()), children: stack, token: Token::Program, + span: (0, 0), }]; stack diff --git a/src/semantic/errors.rs b/src/semantic/errors.rs new file mode 100644 index 0000000..8dfbaaf --- /dev/null +++ b/src/semantic/errors.rs @@ -0,0 +1,104 @@ +use miette::Diagnostic; +use thiserror::Error; + +#[derive(Error, Diagnostic, Debug)] +pub enum SemanticError { + #[error(transparent)] + #[diagnostic(code(file_read::io_error))] + StringError(#[from] std::io::Error), + + /// Stack Errors + + #[error("Empty Stack")] + #[diagnostic(code(semantic_error::empty_stack))] + EmptyStack { + #[source_code] + input: String, + + #[help] + advice: String, + }, + + #[error("Invalid Cast")] + #[diagnostic(code(semantic_error::invalid_cast))] + InvalidCast { + #[source_code] + input: String, + + #[help] + advice: String, + + #[label = "Here"] + extension_src: (usize, usize), + }, + + /// Variable errors + + #[error("Duplicate Variable")] + #[diagnostic(code(semantic_error::duplicate_variable))] + DupplicateVariable { + #[source_code] + input: String, + + #[help] + advice: String, + + #[label = "First declared here"] + first_extension_src: (usize, usize), + + #[label = "Trying to declare again here"] + extension_src: (usize, usize), + }, + + #[error("Variable not declared")] + #[diagnostic(code(semantic_error::variable_not_declared))] + VariableNotDeclared { + #[source_code] + input: String, + + #[help] + advice: String, + + #[label = "Here"] + extension_src: (usize, usize), + }, + + #[error("Invalid Assignment")] + #[diagnostic(code(semantic_error::variable_type_mismatch))] + VariableTypeMismatch { + #[source_code] + input: String, + + #[help] + advice: String, + + #[label = "Here"] + extension_src: (usize, usize), + }, + + #[error("Operators type differ")] + #[diagnostic(code(semantic_error::operators_type_differ))] + OperatorsTypeDiffer { + #[source_code] + input: String, + + #[help] + advice: String, + + #[label = "Here"] + extension_src: (usize, usize), + }, + + #[error("Invalid Operator")] + #[diagnostic(code(semantic_error::invalid_operator))] + InvalidOperator { + #[source_code] + input: String, + + #[help] + advice: String, + + #[label = "Here"] + extension_src: (usize, usize), + }, +} diff --git a/src/semantic/mod.rs b/src/semantic/mod.rs index 851c0bc..c1b7a94 100644 --- a/src/semantic/mod.rs +++ b/src/semantic/mod.rs @@ -1 +1,114 @@ +use crate::{ + lexer::tokens::{ArithmeticOperators, StackOperators, Token}, + parser::parse::AstNode, +}; +use miette::Result as MietteResult; + +use self::{ + errors::SemanticError, + stack_frame::StackFrame, + symbol_table::{SymbolTable, Value}, +}; + pub mod ast; +pub mod errors; +pub mod stack_frame; +pub mod symbol_table; + +pub struct SemanticAnalyzer { + pub symbol_table: SymbolTable, + stack: StackFrame, + source_code: String, +} + +impl SemanticAnalyzer { + pub fn new(source_code: String) -> Self { + Self { + symbol_table: SymbolTable::new(source_code.to_string()), + stack: Default::default(), + source_code, + } + } + + pub fn analyze(&mut self, ast: &AstNode) -> MietteResult<()> { + match ast.token.clone() { + Token::Program => { + for child in ast.children.iter() { + self.analyze(child)?; + } + } + Token::Integer(value) => { + self.stack.values.push(Value::I32(value)); + } + Token::Float(value) => self.stack.values.push(Value::F32(value)), + Token::String(value) => self.stack.values.push(Value::String(value)), + Token::Boolean(value) => self.stack.values.push(Value::Bool(value)), + Token::StackOps(operator) => { + match operator { + StackOperators::Dump => self.stack_pop()?, + StackOperators::Dup => self.stack_dup()?, + StackOperators::Drop => self.stack_pop()?, + }; + } + Token::ArithmeticOp(operator) => { + self.analyze(ast.children.get(0).unwrap()).unwrap(); // left side + self.analyze(ast.children.get(1).unwrap()).unwrap(); // right side + + let left = self.stack_pop()?; + let right = self.stack_pop()?; + + match (left, right) { + (Value::I32(left), Value::I32(right)) => { + self.stack.values.push(Value::I32(match operator { + ArithmeticOperators::Plus => left + right, + ArithmeticOperators::Minus => left - right, + ArithmeticOperators::Times => left * right, + ArithmeticOperators::Divide => left / right, + ArithmeticOperators::Modulo => left % right, + })) + } + (Value::F32(left), Value::F32(right)) => { + self.stack.values.push(Value::F32(match operator { + ArithmeticOperators::Plus => left + right, + ArithmeticOperators::Minus => left - right, + ArithmeticOperators::Times => left * right, + ArithmeticOperators::Divide => left / right, + ArithmeticOperators::Modulo => left % right, + })) + } + _ => Err(SemanticError::OperatorsTypeDiffer { + input: self.source_code.clone(), + advice: "You can only add two values of the same type".to_string(), + extension_src: ast.span, + })?, + } + } + _ => panic!("Unsupported Validation!"), + } + + Ok(()) + } + + pub fn stack_dup(&mut self) -> MietteResult { + match self.stack.values.clone().last() { + Some(value) => { + self.stack.values.push(value.clone()); + Ok(value.clone()) + } + None => Err(SemanticError::EmptyStack { + input: self.source_code.clone(), + advice: "You can't dup an empty stack".to_string(), + })?, + } + } + + pub fn stack_pop(&mut self) -> MietteResult { + match self.stack.values.pop() { + Some(value) => Ok(value), + None => Err(SemanticError::EmptyStack { + input: self.source_code.clone(), + advice: "You can't pop an empty stack".to_string(), + })?, + } + } +} diff --git a/src/semantic/stack_frame.rs b/src/semantic/stack_frame.rs new file mode 100644 index 0000000..859e65a --- /dev/null +++ b/src/semantic/stack_frame.rs @@ -0,0 +1,8 @@ +use super::symbol_table::Value; + +// The stack frame will actually keep track of the values in the stack +// as the pile is a stack-based language +#[derive(Debug, Clone, PartialEq, Default)] +pub struct StackFrame { + pub values: Vec, +} diff --git a/src/semantic/symbol_table.rs b/src/semantic/symbol_table.rs new file mode 100644 index 0000000..077b86e --- /dev/null +++ b/src/semantic/symbol_table.rs @@ -0,0 +1,333 @@ +use std::{collections::HashMap, fmt::Display}; + +use crate::lexer::tokens::span_to_tuple; + +use super::errors::SemanticError; + +/// The Semantic analysis of the language must keep track of the current +/// stack with the values, types and scopes. +/// As well as a table of symbols and their types. + +#[derive(Debug, Clone, PartialEq)] +pub enum Value { + I32(i32), + I64(i64), + F32(f32), + F64(f64), + Bool(bool), + String(String), +} + +impl From<&str> for Value { + fn from(type_name: &str) -> Self { + match type_name { + "i32" => Value::I32(0), + "i64" => Value::I64(0_i64), + "f32" => Value::F32(0_f32), + "f64" => Value::F64(0_f64), + "bool" => Value::Bool(false), + "string" => Value::String("".to_string()), + _ => Value::I32(0), + } + } +} + +impl From<(&str, &str)> for Value { + fn from((value, type_name): (&str, &str)) -> Self { + match type_name { + "i32" => Value::I32(value.parse().unwrap()), + "i64" => Value::I64(value.parse().unwrap()), + "f32" => Value::F32(value.parse().unwrap()), + "f64" => Value::F64(value.parse().unwrap()), + "bool" => Value::Bool(value.parse().unwrap()), + "string" => Value::String(value.to_string()), + _ => Value::I32(0), + } + } +} + +impl Default for Value { + fn default() -> Self { + Value::I32(0) + } +} + +impl Value { + pub fn compare_type_to(&self, other: &Value) -> bool { + matches!( + (self, other), + (Value::I32(_), Value::I32(_)) + | (Value::I64(_), Value::I64(_)) + | (Value::F32(_), Value::F32(_)) + | (Value::F64(_), Value::F64(_)) + | (Value::Bool(_), Value::Bool(_)) + | (Value::String(_), Value::String(_)) + ) + } + + pub fn get_type(&self) -> String { + match self { + Value::I32(_) => "i32", + Value::I64(_) => "i64", + Value::F32(_) => "f32", + Value::F64(_) => "f64", + Value::Bool(_) => "bool", + Value::String(_) => "string", + } + .to_string() + } + + pub fn cast_to(&self, type_name: &str) -> Result { + match (type_name, self) { + ("i32", Value::I32(value)) => Ok(Value::I32(*value)), + ("i32", Value::F32(value)) => Ok(Value::I32(*value as i32)), + _ => Err(format!( + "Cannot cast value {:?} to type {}", + self, type_name + )), + } + } + + pub fn times(&self, other: &Value) -> Result { + match (self, other) { + (Value::I32(value), Value::I32(other)) => Ok(Value::I32(value * other)), + (Value::I64(value), Value::I64(other)) => Ok(Value::I64(value * other)), + (Value::F32(value), Value::F32(other)) => Ok(Value::F32(value * other)), + (Value::F64(value), Value::F64(other)) => Ok(Value::F64(value * other)), + _ => Err(()), + } + } + + pub fn plus(&self, other: &Value) -> Result { + match (self, other) { + (Value::I32(value), Value::I32(other)) => Ok(Value::I32(value + other)), + (Value::I64(value), Value::I64(other)) => Ok(Value::I64(value + other)), + (Value::F32(value), Value::F32(other)) => Ok(Value::F32(value + other)), + (Value::F64(value), Value::F64(other)) => Ok(Value::F64(value + other)), + (Value::String(value), Value::String(other)) => { + Ok(Value::String(format!("{}{}", value, other))) + } + _ => Err(()), + } + } + + pub fn minus(&self, other: &Value) -> Result { + match (self, other) { + (Value::I32(value), Value::I32(other)) => Ok(Value::I32(value - other)), + (Value::I64(value), Value::I64(other)) => Ok(Value::I64(value - other)), + (Value::F32(value), Value::F32(other)) => Ok(Value::F32(value - other)), + (Value::F64(value), Value::F64(other)) => Ok(Value::F64(value - other)), + _ => Err(()), + } + } + + pub fn get_value(&self) -> String { + match self { + Value::I32(value) => value.to_string(), + Value::I64(value) => value.to_string(), + Value::F32(value) => value.to_string(), + Value::F64(value) => value.to_string(), + Value::Bool(value) => value.to_string(), + Value::String(value) => value.to_string(), + } + } +} + +/// The scope is an enum that can be either a global scope or a local scope +/// The local scope is a vector of strings that contains the names of the +/// variables in the scope +#[derive(Default, Debug, Clone, PartialEq)] +pub enum Scope { + #[default] + Global, + Local(usize), +} + +/// Symbol information +/// The symbol information is stored in a struct +/// The symbol name, the type, the scope, the value and the position +#[derive(Default, Debug, Clone, PartialEq)] +pub struct Symbol { + pub name: String, + pub scope: Scope, + pub value: Value, + pub position: (u32, u32), +} + +/// Symbol table +/// Use a hash map to store the symbols and their informations +/// The key is the symbol name and the value is the symbol information +#[derive(Default, Debug)] +pub struct SymbolTable { + pub symbols: HashMap<(String, usize), Symbol>, + pub current_scope: usize, + pub source: String, +} + +impl From for Scope { + fn from(scope: usize) -> Self { + match scope { + 0 => Scope::Global, + _ => Scope::Local(scope), + } + } +} + +impl Display for SymbolTable { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + for (key, value) in &self.symbols { + writeln!( + f, + "Symbol: {:?} - {:?} - {:?} - {:?} - {:?}", + key, value.name, value.scope, value.value, value.position + )?; + } + Ok(()) + } +} + +impl SymbolTable { + /// Create a new symbol table + pub fn new(source: String) -> Self { + SymbolTable { + symbols: HashMap::new(), + current_scope: 0, + source, + } + } + + pub fn enter_scope(&mut self) { + self.current_scope += 1; + } + + pub fn exit_scope(&mut self) { + self.current_scope -= 1; + } + + pub fn define( + &mut self, + name: &str, + value: Value, + (row, col): (usize, usize), + ) -> Result, SemanticError> { + if let Some(symbol) = self.symbols.get(&(name.to_string(), self.current_scope)) { + return Err(SemanticError::DupplicateVariable { + input: self.source.clone(), + advice: "This variable was already declared!".to_string(), + extension_src: span_to_tuple(row..col), + first_extension_src: span_to_tuple(symbol.position.0 as usize..symbol.position.1 as usize), + }); + } + + Ok(self.symbols.insert( + (name.to_string(), self.current_scope), + Symbol { + name: name.to_string(), + scope: self.current_scope.into(), + value, + position: (row as u32, col as u32), + }, + )) + } + + fn symbol_at_scope(&self, name: &str, scope: usize) -> Option { + self.symbols.get(&(name.to_string(), scope)).cloned() + } + + pub fn lookup(&self, name: &str) -> Option { + for scope in (0..=self.current_scope).rev() { + match self.symbol_at_scope(name, scope) { + Some(symbol) => return Some(symbol), + None => continue, + } + } + None + } + + pub fn update_variable(&mut self, name: &str, value: Value) -> Result<(), String> { + self + .symbols + .get_mut(&(name.to_string(), self.current_scope)) + .map(|symbol| { + symbol.value = value; + Ok(()) + }) + .unwrap_or(Err(format!("Variable {} is not yet declared", name))) + } +} + +#[cfg(test)] +mod semantic_tests { + use super::*; + + #[test] + fn test_global_scope() -> Result<(), Box> { + let mut symbol_table = SymbolTable::new(String::from("a")); + + symbol_table.define("a", Value::I32(1), (0, 0)); + + let var_a = symbol_table.lookup("a").expect("Variable a not found"); + + assert_eq!(var_a.scope, Scope::Global); + + Ok(()) + } + + #[test] + fn test_local_scope() -> Result<(), Box> { + let mut symbol_table = SymbolTable::new(String::from("a b")); + + symbol_table.define("a", Value::I32(1), (0, 0)); + symbol_table.enter_scope(); + symbol_table.define("b", Value::I32(2), (2, 0)); + + let var_a = symbol_table.lookup("a").expect("Variable a not found"); + let var_b = symbol_table.lookup("b").expect("Variable b not found"); + + assert_eq!(var_a.scope, Scope::Global); + assert_eq!(var_b.scope, Scope::Local(1)); + + Ok(()) + } + + #[test] + fn test_local_scope_exit() -> Result<(), Box> { + let mut symbol_table = SymbolTable::new(String::from("a b")); + + symbol_table.define("a", Value::I32(1), (0, 0)); + symbol_table.enter_scope(); + symbol_table.define("b", Value::I32(2), (2, 0)); + symbol_table.exit_scope(); + + let var_a = symbol_table.lookup("a").expect("Variable a not found"); + let var_b = symbol_table.lookup("b"); + + assert_eq!(var_a.scope, Scope::Global); + assert_eq!(var_b, None); + + Ok(()) + } + + #[test] + fn test_local_preference() -> Result<(), Box> { + let mut symbol_table = SymbolTable::new(String::from("a a")); + + symbol_table.define("a", Value::I32(1), (0, 0)); + symbol_table.enter_scope(); + symbol_table.define("a", Value::I32(2), (2, 0)); + + let var_a = symbol_table.lookup("a").expect("Variable a not found"); + + assert_eq!(var_a.scope, Scope::Local(1)); + assert_eq!(var_a.value, Value::I32(2)); + + symbol_table.exit_scope(); + + let var_a = symbol_table.lookup("a").expect("Variable a not found"); + + assert_eq!(var_a.scope, Scope::Global); + assert_eq!(var_a.value, Value::I32(1)); + + Ok(()) + } +}