Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RusticChimp - an interpreter for Monkey programming language #2

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions rustic-chimp/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[package]
name = "rustic-chimp"
version = "0.1.0"
edition = "2021"

[dependencies]
3 changes: 3 additions & 0 deletions rustic-chimp/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# RusticChimp

An interpreter for the [Monkey programming language](https://monkeylang.org/) in Rust.
235 changes: 235 additions & 0 deletions rustic-chimp/src/lexer/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,235 @@
use crate::token::{lookup_ident, Token};

pub struct Lexer<'a> {
/// input source
input: &'a str,

/// current position in input (points to current char)
position: usize,

/// current reading position in input (after current char)
read_position: usize,

/// current char under examination
ch: char,
}

impl<'a> Lexer<'a> {
pub fn new(input: &'a str) -> Self {
let mut lex = Lexer {
input,
position: 0,
read_position: 0,
ch: '\0',
};

lex.read_char();
lex
}

pub fn read_char(&mut self) {
self.ch = self.peek_char();
self.position = self.read_position;
self.read_position += 1;
}

fn peek_char(&self) -> char {
if self.read_position >= self.input.len() {
return '\0';
}

self.input.chars().nth(self.read_position).unwrap()
}

pub fn next_token(&mut self) -> Token {
self.skip_whitespace();

let tok = match self.ch {
'=' => {
if self.peek_char() == '=' {
self.read_char();
Token::Eq
} else {
Token::Assign
}
}
'!' => {
if self.peek_char() == '=' {
self.read_char();
Token::NotEq
} else {
Token::Bang
}
}

'+' => Token::Plus,
'-' => Token::Minus,
'*' => Token::Asterisk,
'/' => Token::Slash,
'<' => Token::Lt,
'>' => Token::Gt,
',' => Token::Comma,
';' => Token::Semicolon,
'(' => Token::Lparen,
')' => Token::Rparen,
'{' => Token::Lcurly,
'}' => Token::Rcurly,
'\0' => Token::Eof,
_ => {
if is_letter(self.ch) {
return self.read_identifier();
}
if is_digit(self.ch) {
return self.read_number();
}
Token::Illegal
}
};

self.read_char();
tok
}

fn read_identifier(&mut self) -> Token {
let pos = self.position;
while is_letter(self.ch) {
self.read_char();
}

lookup_ident(&self.input[pos..self.position])
}

fn read_number(&mut self) -> Token {
let pos = self.position;
while is_digit(self.ch) {
self.read_char();
}

Token::Int(self.input[pos..self.position].parse().unwrap())
}

fn skip_whitespace(&mut self) {
while self.ch == ' ' || self.ch == '\t' || self.ch == '\n' || self.ch == '\r' {
self.read_char()
}
}
}

fn is_digit(ch: char) -> bool {
ch.is_ascii_digit()
}

fn is_letter(ch: char) -> bool {
ch.is_ascii_alphabetic() || ch == '_'
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_next_token() {
let input = "let five = 5;
let ten = 10;
let add = fn(x, y) {
x + y;
};

let result = add(five, ten);
!-/*5;
5 < 10 > 5;

if (5 < 10) {
return true;
} else {
return false;
}

10 == 10;
10 != 9;
";

let tests = vec![
Token::Let,
Token::Ident("five".to_string()),
Token::Assign,
Token::Int(5),
Token::Semicolon,
Token::Let,
Token::Ident("ten".to_string()),
Token::Assign,
Token::Int(10),
Token::Semicolon,
Token::Let,
Token::Ident("add".to_string()),
Token::Assign,
Token::Function,
Token::Lparen,
Token::Ident("x".to_string()),
Token::Comma,
Token::Ident("y".to_string()),
Token::Rparen,
Token::Lcurly,
Token::Ident("x".to_string()),
Token::Plus,
Token::Ident("y".to_string()),
Token::Semicolon,
Token::Rcurly,
Token::Semicolon,
Token::Let,
Token::Ident("result".to_string()),
Token::Assign,
Token::Ident("add".to_string()),
Token::Lparen,
Token::Ident("five".to_string()),
Token::Comma,
Token::Ident("ten".to_string()),
Token::Rparen,
Token::Semicolon,
Token::Bang,
Token::Minus,
Token::Slash,
Token::Asterisk,
Token::Int(5),
Token::Semicolon,
Token::Int(5),
Token::Lt,
Token::Int(10),
Token::Gt,
Token::Int(5),
Token::Semicolon,
Token::If,
Token::Lparen,
Token::Int(5),
Token::Lt,
Token::Int(10),
Token::Rparen,
Token::Lcurly,
Token::Return,
Token::True,
Token::Semicolon,
Token::Rcurly,
Token::Else,
Token::Lcurly,
Token::Return,
Token::False,
Token::Semicolon,
Token::Rcurly,
Token::Int(10),
Token::Eq,
Token::Int(10),
Token::Semicolon,
Token::Int(10),
Token::NotEq,
Token::Int(9),
Token::Semicolon,
Token::Eof,
];

let mut lex = Lexer::new(input);
for expected_token in tests {
let tok = lex.next_token();
assert_eq!(expected_token, tok);
}
}
}
3 changes: 3 additions & 0 deletions rustic-chimp/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
pub mod lexer;
pub mod repl;
pub mod token;
10 changes: 10 additions & 0 deletions rustic-chimp/src/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
use std::io;

use rustic_chimp::repl;

fn main() {
println!("Hello! This is the Monkey programming language!");
println!("Feel free to type in commands");

repl::start(io::stdin(), io::stdout());
}
31 changes: 31 additions & 0 deletions rustic-chimp/src/repl/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
use std::io::{self, BufRead, Read, Write};

use crate::{lexer::Lexer, token::Token};

const PROMPT: &str = ">> ";

pub fn start<R, W>(input: R, output: W)
where
R: Read,
W: Write,
{
let mut reader = io::BufReader::new(input);
let mut writer = output;

loop {
write!(writer, "{}", PROMPT).unwrap();
writer.flush().unwrap();

let mut line = String::new();
if reader.read_line(&mut line).is_err() {
return;
}

let mut lex = Lexer::new(&line);
let mut tok = lex.next_token();
while tok != Token::Eof {
println!("{:?}", tok);
tok = lex.next_token();
}
}
}
56 changes: 56 additions & 0 deletions rustic-chimp/src/token/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#[derive(Debug, Eq, PartialEq)]
pub enum Token {
Illegal,
Eof,

// Identifiers + literals
Ident(String),
Int(i64),

// Operators
Assign, // =
Plus, // +
Minus, // -
Bang, // !
Asterisk, // *
Slash, // /

// Comparisions
Lt, // <
Gt, // >
Eq, // ==
NotEq, // !=

// Delimeters
Comma, // ,
Semicolon, // ;
Lparen, // (
Rparen, // )
Lcurly, // {
Rcurly, // }

// Keywords
Function, // fn
Let, // let
True, // true
False, // false
If, // if
Else, // else
Return, // return
}

pub fn lookup_ident(ident: &str) -> Token {
match ident {
// keywords
"fn" => Token::Function,
"let" => Token::Let,
"true" => Token::True,
"false" => Token::False,
"if" => Token::If,
"else" => Token::Else,
"return" => Token::Return,

// identifier
_ => Token::Ident(ident.to_string()),
}
}