diff --git a/rustic-chimp/Cargo.toml b/rustic-chimp/Cargo.toml new file mode 100644 index 0000000..57a671c --- /dev/null +++ b/rustic-chimp/Cargo.toml @@ -0,0 +1,6 @@ +[package] +name = "rustic-chimp" +version = "0.1.0" +edition = "2021" + +[dependencies] diff --git a/rustic-chimp/README.md b/rustic-chimp/README.md new file mode 100644 index 0000000..870ac13 --- /dev/null +++ b/rustic-chimp/README.md @@ -0,0 +1,3 @@ +# RusticChimp + +An interpreter for the [Monkey programming language](https://monkeylang.org/) in Rust. diff --git a/rustic-chimp/src/lexer/mod.rs b/rustic-chimp/src/lexer/mod.rs new file mode 100644 index 0000000..2f0a8d2 --- /dev/null +++ b/rustic-chimp/src/lexer/mod.rs @@ -0,0 +1,235 @@ +use crate::token::{lookup_ident, Token}; + +pub struct Lexer<'a> { + /// input source + input: &'a str, + + /// current position in input (points to current char) + position: usize, + + /// current reading position in input (after current char) + read_position: usize, + + /// current char under examination + ch: char, +} + +impl<'a> Lexer<'a> { + pub fn new(input: &'a str) -> Self { + let mut lex = Lexer { + input, + position: 0, + read_position: 0, + ch: '\0', + }; + + lex.read_char(); + lex + } + + pub fn read_char(&mut self) { + self.ch = self.peek_char(); + self.position = self.read_position; + self.read_position += 1; + } + + fn peek_char(&self) -> char { + if self.read_position >= self.input.len() { + return '\0'; + } + + self.input.chars().nth(self.read_position).unwrap() + } + + pub fn next_token(&mut self) -> Token { + self.skip_whitespace(); + + let tok = match self.ch { + '=' => { + if self.peek_char() == '=' { + self.read_char(); + Token::Eq + } else { + Token::Assign + } + } + '!' => { + if self.peek_char() == '=' { + self.read_char(); + Token::NotEq + } else { + Token::Bang + } + } + + '+' => Token::Plus, + '-' => Token::Minus, + '*' => Token::Asterisk, + '/' => Token::Slash, + '<' => Token::Lt, + '>' => Token::Gt, + ',' => Token::Comma, + ';' => Token::Semicolon, + '(' => Token::Lparen, + ')' => Token::Rparen, + '{' => Token::Lcurly, + '}' => Token::Rcurly, + '\0' => Token::Eof, + _ => { + if is_letter(self.ch) { + return self.read_identifier(); + } + if is_digit(self.ch) { + return self.read_number(); + } + Token::Illegal + } + }; + + self.read_char(); + tok + } + + fn read_identifier(&mut self) -> Token { + let pos = self.position; + while is_letter(self.ch) { + self.read_char(); + } + + lookup_ident(&self.input[pos..self.position]) + } + + fn read_number(&mut self) -> Token { + let pos = self.position; + while is_digit(self.ch) { + self.read_char(); + } + + Token::Int(self.input[pos..self.position].parse().unwrap()) + } + + fn skip_whitespace(&mut self) { + while self.ch == ' ' || self.ch == '\t' || self.ch == '\n' || self.ch == '\r' { + self.read_char() + } + } +} + +fn is_digit(ch: char) -> bool { + ch.is_ascii_digit() +} + +fn is_letter(ch: char) -> bool { + ch.is_ascii_alphabetic() || ch == '_' +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_next_token() { + let input = "let five = 5; +let ten = 10; +let add = fn(x, y) { + x + y; +}; + +let result = add(five, ten); +!-/*5; +5 < 10 > 5; + +if (5 < 10) { + return true; +} else { + return false; +} + +10 == 10; +10 != 9; +"; + + let tests = vec![ + Token::Let, + Token::Ident("five".to_string()), + Token::Assign, + Token::Int(5), + Token::Semicolon, + Token::Let, + Token::Ident("ten".to_string()), + Token::Assign, + Token::Int(10), + Token::Semicolon, + Token::Let, + Token::Ident("add".to_string()), + Token::Assign, + Token::Function, + Token::Lparen, + Token::Ident("x".to_string()), + Token::Comma, + Token::Ident("y".to_string()), + Token::Rparen, + Token::Lcurly, + Token::Ident("x".to_string()), + Token::Plus, + Token::Ident("y".to_string()), + Token::Semicolon, + Token::Rcurly, + Token::Semicolon, + Token::Let, + Token::Ident("result".to_string()), + Token::Assign, + Token::Ident("add".to_string()), + Token::Lparen, + Token::Ident("five".to_string()), + Token::Comma, + Token::Ident("ten".to_string()), + Token::Rparen, + Token::Semicolon, + Token::Bang, + Token::Minus, + Token::Slash, + Token::Asterisk, + Token::Int(5), + Token::Semicolon, + Token::Int(5), + Token::Lt, + Token::Int(10), + Token::Gt, + Token::Int(5), + Token::Semicolon, + Token::If, + Token::Lparen, + Token::Int(5), + Token::Lt, + Token::Int(10), + Token::Rparen, + Token::Lcurly, + Token::Return, + Token::True, + Token::Semicolon, + Token::Rcurly, + Token::Else, + Token::Lcurly, + Token::Return, + Token::False, + Token::Semicolon, + Token::Rcurly, + Token::Int(10), + Token::Eq, + Token::Int(10), + Token::Semicolon, + Token::Int(10), + Token::NotEq, + Token::Int(9), + Token::Semicolon, + Token::Eof, + ]; + + let mut lex = Lexer::new(input); + for expected_token in tests { + let tok = lex.next_token(); + assert_eq!(expected_token, tok); + } + } +} diff --git a/rustic-chimp/src/lib.rs b/rustic-chimp/src/lib.rs new file mode 100644 index 0000000..808f21d --- /dev/null +++ b/rustic-chimp/src/lib.rs @@ -0,0 +1,3 @@ +pub mod lexer; +pub mod repl; +pub mod token; diff --git a/rustic-chimp/src/main.rs b/rustic-chimp/src/main.rs new file mode 100644 index 0000000..c77f3a6 --- /dev/null +++ b/rustic-chimp/src/main.rs @@ -0,0 +1,10 @@ +use std::io; + +use rustic_chimp::repl; + +fn main() { + println!("Hello! This is the Monkey programming language!"); + println!("Feel free to type in commands"); + + repl::start(io::stdin(), io::stdout()); +} diff --git a/rustic-chimp/src/repl/mod.rs b/rustic-chimp/src/repl/mod.rs new file mode 100644 index 0000000..0400c11 --- /dev/null +++ b/rustic-chimp/src/repl/mod.rs @@ -0,0 +1,31 @@ +use std::io::{self, BufRead, Read, Write}; + +use crate::{lexer::Lexer, token::Token}; + +const PROMPT: &str = ">> "; + +pub fn start(input: R, output: W) +where + R: Read, + W: Write, +{ + let mut reader = io::BufReader::new(input); + let mut writer = output; + + loop { + write!(writer, "{}", PROMPT).unwrap(); + writer.flush().unwrap(); + + let mut line = String::new(); + if reader.read_line(&mut line).is_err() { + return; + } + + let mut lex = Lexer::new(&line); + let mut tok = lex.next_token(); + while tok != Token::Eof { + println!("{:?}", tok); + tok = lex.next_token(); + } + } +} diff --git a/rustic-chimp/src/token/mod.rs b/rustic-chimp/src/token/mod.rs new file mode 100644 index 0000000..f6e982d --- /dev/null +++ b/rustic-chimp/src/token/mod.rs @@ -0,0 +1,56 @@ +#[derive(Debug, Eq, PartialEq)] +pub enum Token { + Illegal, + Eof, + + // Identifiers + literals + Ident(String), + Int(i64), + + // Operators + Assign, // = + Plus, // + + Minus, // - + Bang, // ! + Asterisk, // * + Slash, // / + + // Comparisions + Lt, // < + Gt, // > + Eq, // == + NotEq, // != + + // Delimeters + Comma, // , + Semicolon, // ; + Lparen, // ( + Rparen, // ) + Lcurly, // { + Rcurly, // } + + // Keywords + Function, // fn + Let, // let + True, // true + False, // false + If, // if + Else, // else + Return, // return +} + +pub fn lookup_ident(ident: &str) -> Token { + match ident { + // keywords + "fn" => Token::Function, + "let" => Token::Let, + "true" => Token::True, + "false" => Token::False, + "if" => Token::If, + "else" => Token::Else, + "return" => Token::Return, + + // identifier + _ => Token::Ident(ident.to_string()), + } +}