diff --git a/didasm/.gitignore b/didasm/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/didasm/.gitignore @@ -0,0 +1 @@ +/target diff --git a/didasm/Cargo.lock b/didasm/Cargo.lock new file mode 100644 index 0000000..a3f6397 --- /dev/null +++ b/didasm/Cargo.lock @@ -0,0 +1,136 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "bitfield" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f798d2d157e547aa99aab0967df39edd0b70307312b6f8bd2848e6abe40896e0" + +[[package]] +name = "didasm" +version = "0.1.0" +dependencies = [ + "bitfield", + "once_cell", + "regex", + "strum", + "strum_macros", +] + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + +[[package]] +name = "once_cell" +version = "1.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" + +[[package]] +name = "proc-macro2" +version = "1.0.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f139b0662de085916d1fb67d2b4169d1addddda1919e696f3252b740b629986e" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "regex" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" + +[[package]] +name = "rustversion" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e819f2bc632f285be6d7cd36e25940d45b2391dd6d9b939e79de557f7014248" + +[[package]] +name = "strum" +version = "0.26.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" + +[[package]] +name = "strum_macros" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "rustversion", + "syn", +] + +[[package]] +name = "syn" +version = "2.0.87" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25aa4ce346d03a6dcd68dd8b4010bcb74e54e62c90c573f394c46eae99aba32d" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" diff --git a/didasm/Cargo.toml b/didasm/Cargo.toml new file mode 100644 index 0000000..3b059ae --- /dev/null +++ b/didasm/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "didasm" +version = "0.1.0" +edition = "2021" + +[dependencies] +bitfield = "0.17.0" +once_cell = "1.20.2" +regex = "1.11.1" +strum = "0.26.3" +strum_macros = "0.26.4" diff --git a/didasm/src/main.rs b/didasm/src/main.rs new file mode 100644 index 0000000..2f4ebab --- /dev/null +++ b/didasm/src/main.rs @@ -0,0 +1,400 @@ +use bitfield::{bitfield, BitRange, BitRangeMut}; +use regex::Regex; +#[allow(unused_imports)] +use std::collections::{HashMap, HashSet}; +use std::fmt::Display; + + + +mod tokens; +use tokens::*; +use std::str::FromStr; +use Mnemonic::*; +use Operand::*; + +bitfield! { + /// While some fields might be renamed, they might still + /// be useful for understanding what exactly you are filling out + struct Instruction(u16); + no default BitRange; + impl Debug; + + eff_address, set_eff_address: 0,0; + two_addresses, set_if_2_addrs: 1,1; + is_imm, set_if_imm: 2,2; + does_save, set_if_saves: 3,3; + is_operation, set_if_operation: 3,3; + in_type, set_in_type: 0,3; + opcode, set_op: 4,6; + opcode_jcond, set_op_jcond:4,7; + d, set_dir: 7,7; + modifier, set_mod: 8,9; + reg, set_reg: 10,12; + rm, set_rm: 13,15; + port, set_port: 8,15; + pc_depls, set_pc_depls: 8,15; +} + +impl BitRange for Instruction { + fn bit_range(&self, msb: usize, lsb: usize) -> u16 { + let width = msb.abs_diff(lsb) + 1; + let mask = (1 << width) - 1; + ((self.0 >> lsb) & mask) as u16 + } +} +impl BitRangeMut for Instruction { + fn set_bit_range(&mut self, msb: usize, lsb: usize, value: u16) { + let w = msb.abs_diff(lsb) + 1; + let mn = if msb > lsb {lsb} else {msb}; + let _mx = if msb > lsb {msb} else {lsb}; + let value = if msb > lsb {value} else {value.reverse_bits() >> (16 - w)}; + let mask = ((1 << w) - 1) << mn; + let mask = !mask; + // println!("Bitmask: {mask:016b}, {value:0width$b}", width=w); + self.0 = self.0 & mask; + self.0 += (value as u16) << mn; + } +} + +// impl BitRange for Instruction { +// fn bit_range(&self, msb: usize, lsb: usize) -> u8 { +// let width = msb - lsb + 1; +// let mask = (1 << width) - 1; +// ((self.0 >> lsb) & mask) as u8 +// } +// } +// impl BitRangeMut for Instruction { +// fn set_bit_range(&mut self, msb: usize, lsb: usize, value: u8) { +// self.0 = (value as u16) << lsb; +// print_bits(&self); +// } +// } + +#[derive(Debug)] +struct Statement { + lineno: usize, + line: String, + mnemonic: String, + op1: Option, + op2: Option +} + +#[derive(Debug)] +struct ParsedStatement { + mnemonic: Mnemonic, + op1: Option, + op2: Option +} + +#[derive(Debug)] +struct FullInstruction { + i: Instruction, + depls: Option, + imm: Option +} + +#[allow(dead_code)] +fn print_bits(instr: &Instruction) { + print!("Bits: "); + let x = instr.0; + for i in 0..16 { + print!("{}", (x >> i) & 1); + } + println!(); +} + +impl Display for FullInstruction { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let x = self.i.0; + write!(f, "{:016b}", x)?; + if let Some(x) = self.depls { + write!(f, " {:016b}", x)?; + } + if let Some(x) = self.imm { + write!(f, " {:016b}", x)?; + } + + Ok(()) + } +} + +impl ParsedStatement { + fn from_statement(s: Statement) -> Result { + Ok(ParsedStatement { + mnemonic: Mnemonic::from_str(&s.mnemonic).map_err(|_| format!("'{}' is not a supported mnemonic", &s.mnemonic))?, + op1: s.op1.map(|op| Operand::from_str(&op)).transpose()?, + op2: s.op2.map(|op| Operand::from_str(&op)).transpose()? + }) + } + + fn convert_to_instruction(self) -> Result { + let ParsedStatement { mnemonic, op1, op2, .. } = self; + let mut opcode: Instruction = Instruction(0); + opcode.set_op((&mnemonic).into()); + + let imm = match (op1.clone(), op2.clone()) { + // Since the first operand might be a port or relative jump, we just ignore that Imm is source operand + // We check for this possibility later + (Some(Imm(i)), _) | + (_, Some(Imm(i))) + => Some(i), + _ => None + }; + + match &mnemonic { + In|Out|Pushf|Popf|Ret|Iret|Hlt => { + opcode.set_in_type(0b1000); + }, + m @ (Jbe|Jb|Jc|Jle|Jl|Je|Jz|Jo|Js|Jpe|Ja|Jae|Jnc| + Jg|Jge|Jne|Jnz|Jno|Jns|Jpo) => { + opcode.set_in_type(0b1001); + opcode.set_op_jcond(m.into()); + }, + Push|Pop|Call|Jmp => { + opcode.set_in_type(0b0000); + }, + Inc|Dec|Neg|Not|Shl|Shr|Sar|Sal => { + opcode.set_in_type(0b0001); + }, + Cmp|Test => { + opcode.set_in_type(0b0100); + opcode.set_if_imm(imm.is_some() as u16); + }, + Mov => { + opcode.set_in_type(0b0000); + // print_bits(&opcode); + opcode.set_if_imm(imm.is_some() as u16); + // print_bits(&opcode); + + }, + Add|Adc|Sub|Sbb|And|Or|Xor => { + opcode.set_in_type(0b0101); + opcode.set_if_imm(imm.is_some() as u16); + } + } + let (d, reg) = match (op1.clone(), op2.clone()) { + (Some(Reg(r)), _) => (true, Some(r as usize)), + (_, Some(Reg(r))) => (false, Some(r as usize)), + _ => (false, None) + }; + + let (m, rm, depls) = match (op1, op2) { + (Some(Reg(_)), Some(Reg(ri))) + => (0b11, Some(ri.into()), None), + (Some(Imm(_)), Some(Imm(_))) => {return Err(format!("Operations between 2 immediate values is not allowed!"));}, + (Some(Imm(_)), Some(_)) => {return Err(format!("Immediate values are not allowed to be the source operand! Consider swapping the operands"));}, + (Some(RegIndirect(_)|RegSum{..}|RegSumAutodecrement{..}|RegSumAutoincrement{..}|Based{..}|Indexed{..}|BasedIndexed{..}|Direct(_)|Indirect(_)), + Some(RegIndirect(_)|RegSum{..}|RegSumAutodecrement{..}|RegSumAutoincrement{..}|Based{..}|Indexed{..}|BasedIndexed{..}|Direct(_)|Indirect(_))) + => {return Err(format!("You are not allowed to have an operation between 2 operands that use memory!"));} + (Some(RegIndirect(ri)), _) | + (_, Some(RegIndirect(ri))) + => (0b00, Some(ri.into()), None), + (Some(RegSum { b, x }), _) | + (_, Some(RegSum { b, x })) + => (0b00, Some(((x as usize) & 0b1) + ((b as usize)&0b10)), None), + (Some(Indexed { x: ri, depls }|Based { b: ri, depls }), _) | + (_, Some(Indexed { x: ri, depls }|Based { b: ri, depls })) + => (0b10, Some(ri.into()), Some(depls)), + (Some(BasedIndexed { b, x, depls }), _) | + (_, Some(BasedIndexed { b, x, depls })) + => (0b10, Some(((x as usize)&0b1) + ((b as usize)&0b10)), Some(depls)), + (Some(RegSumAutoincrement { b, x }), _) | + (_, Some(RegSumAutoincrement { b, x })) + => (0b01, Some(((x as usize)&0b1) + ((b as usize)&0b10)), None), + (Some(RegSumAutodecrement{b, ..}), _) | + (_, Some(RegSumAutodecrement { b, ..})) + => (0b01, Some(0b100 + ((b as usize >> 1)&1)), None), + (Some(Direct(depls)), _) | + (_, Some(Direct(depls))) + => (0b01, Some(0b110), Some(depls)), + (Some(Indirect(depls)), _) | + (_, Some(Indirect(depls))) + => (0b01, Some(0b111), Some(depls)), + // Cases that should have been placed in _ but explicitly stated + // to utilize the pattern matching mechanism of rust for + // proving completeness on this match + (None|Some(Reg(_)|Imm(_)), None|Some(Reg(_)|Imm(_))) + => (0, None, None) + }; + let depls = depls.map(|e| match e { + Expr::Id(_) => unimplemented!("Identifiers are work in progress"), + Expr::Int(i) => i + }); + + let imm = imm.map(|e| match e { + Expr::Id(_) => unimplemented!("Identifiers are work in progress"), + Expr::Int(i) => i + }); + + match (mnemonic, reg, rm, depls, imm) { + (In|Out, None, None, None, Some(x)) => { + let x: u8 = x.try_into().map_err(|_| format!("Cannot convert '{x}' into an 8 bit unsigned integer"))?; + opcode.set_port(x.into()); + Ok(FullInstruction { + i: opcode, + depls: None, + imm: None + }) + }, + (i@(In|Out),_,_,_,_) + => Err(format!("{i} only takes an immediate operand!")), + (Pushf|Popf|Ret|Iret|Hlt, None, None, None, None) => Ok(FullInstruction { + i: opcode, + depls: None, + imm: None + }), + (i@(Pushf|Popf|Ret|Iret|Hlt),_,_,_,_) + => Err(format!("{i} does not take any operands!")), + (Jbe|Jb|Jc|Jle|Jl|Je|Jz|Jo|Js|Jpe|Ja|Jae|Jnc| + Jg|Jge|Jne|Jnz|Jno|Jns|Jpo, None, None, None, Some(x)) => { + let x: u8 = x.try_into().map_err(|_| format!("Cannot convert '{x}' into an 8 bit unsigned integer"))?; + opcode.set_port(x.into()); + Ok(FullInstruction { i: opcode, depls: None, imm: None }) + }, + (i@(Jbe|Jb|Jc|Jle|Jl|Je|Jz|Jo|Js|Jpe|Ja|Jae|Jnc| + Jg|Jge|Jne|Jnz|Jno|Jns|Jpo),_,_,_,_) + => Err(format!("{i} only takes an immediate operand!")), + (Inc|Dec|Neg|Not|Shl|Shr|Sar|Sal|Pop|Push|Call|Jmp, Some(reg), None, None, None) => { + opcode.set_reg(reg.try_into().unwrap()); + opcode.set_dir(d as u16); + Ok(FullInstruction { i: opcode, depls: None, imm: None }) + }, + (Inc|Dec|Neg|Not|Shl|Shr|Sar|Sal|Pop|Push|Call|Jmp, None, Some(rm), x, None) => { + opcode.set_rm(rm.try_into().unwrap()); + opcode.set_dir(d as u16); + opcode.set_mod(m); + let x: Option = x.map(|x| x.try_into().map_err(|_| + format!("{x} cannot be converted to unsigned 16 bit"))).transpose()?; + Ok(FullInstruction { + i: opcode, + depls: x, + imm: None + }) + }, + (i@(Inc|Dec|Neg|Not|Shl|Shr|Sar|Sal|Pop|Push|Call|Jmp), _,_,_,Some(_)) + => Err(format!("{i} does not support immediate values!")), + (i@(Inc|Dec|Neg|Not|Shl|Shr|Sar|Sal|Pop|Push|Call|Jmp),Some(_),Some(_),_,None)| + (i@(Inc|Dec|Neg|Not|Shl|Shr|Sar|Sal|Pop|Push|Call|Jmp),None,None,None,None) + => Err(format!("{i} only supports one operand!")), + (Inc|Dec|Neg|Not|Shl|Shr|Sar|Sal|Pop|Push|Call|Jmp,_,None,Some(_),_) + => Err(format!("We should never reach this point. Contact the developer!")), + (Mov|Cmp|Test|Add|Adc|Sub|Sbb|And|Or|Xor, Some(reg), Some(rm), x, y) => { + opcode.set_dir(d as u16); + opcode.set_mod(m); + opcode.set_reg(reg.try_into().unwrap()); + opcode.set_rm(rm.try_into().unwrap()); + let x: Option = x.map(|x| x.try_into().map_err(|_| + format!("{x} cannot be converted to unsigned 16 bit"))).transpose()?; + let y: Option = y.map(|x| x.try_into().map_err(|_| + format!("{x} cannot be converted to unsigned 16 bit"))).transpose()?; + + Ok(FullInstruction { i: opcode, depls: x, imm: y }) + }, + (Mov|Cmp|Test|Add|Adc|Sub|Sbb|And|Or|Xor, None, Some(rm), x, Some(imm)) => { + opcode.set_dir(d as u16); + opcode.set_mod(m); + opcode.set_rm(rm.try_into().unwrap()); + let x: Option = x.map(|x| x.try_into().map_err(|_| + format!("{x} cannot be converted to unsigned 16 bit"))).transpose()?; + let imm = imm.try_into().map_err(|_|format!("{imm} cannot be converted to unsigned 16 bit"))?; + Ok(FullInstruction { i: opcode, depls: x, imm: Some(imm) }) + }, + // Tecnhically this should be covered already by a previous branch + // but we are placing this here for completion's sake + (Mov|Cmp|Test|Add|Adc|Sub|Sbb|And|Or|Xor, Some(reg), None, None, Some(imm)) => { + opcode.set_dir(d as u16); + opcode.set_reg(reg.try_into().unwrap()); + let imm = imm.try_into().map_err(|_|format!("{imm} cannot be converted to unsigned 16 bit"))?; + Ok(FullInstruction { i: opcode, depls: None, imm: Some(imm) }) + }, + (i @ (Mov|Cmp|Test|Add|Adc|Sub|Sbb|And|Or|Xor),Some(_),None,None,None) | + (i @ (Mov|Cmp|Test|Add|Adc|Sub|Sbb|And|Or|Xor),None,Some(_),_,None) | + (i @ (Mov|Cmp|Test|Add|Adc|Sub|Sbb|And|Or|Xor),None,None,None,Some(_)|None) + => Err(format!("{i} requires at least 2 operands!")), + (Mov|Cmp|Test|Add|Adc|Sub|Sbb|And|Or|Xor,_,None,Some(_),_) + => Err(format!("We should never reach this point. Contact the developer!")) + } + + } +} + +fn main() { + let _labels = HashMap::::new(); + let line_parser = Regex::new(r#"^(?:\s*([a-zA-Z_0-9]+)\s*:)?\s*(?:(.*?);.*|(.*))$"#).unwrap(); + let stmt_parser = Regex::new(r#"^([a-z]+)(?:\s+([^,]+)(?:\s*,\s*([^,]+))?)?$"#).unwrap(); + let equ_parser = Regex::new(r#"^([a-z_][a-z_0-9]*)\s+equ\s+(.*)$"#).unwrap(); + let mut defines = HashMap::::new(); + let file = std::fs::read_to_string("hello.asm").unwrap(); + let mut _label_queue = Vec::::new(); + let mut any_error1 = false; + let mut any_error2 = false; + let mut any_error3 = false; + let statements = file.lines().enumerate() + .map(|(lineno, line)| (lineno, line_parser.captures(line).unwrap())) + .filter_map(|(lineno, cap)| { + let [_lbl, com, nocom] = cap.iter() + .skip(1) + .map(|m| m.map(|c| c.as_str().trim())) + .collect::>().try_into().unwrap(); + let stmt = com.or(nocom).map(|s| if s.is_empty() {None} else {Some(s.to_lowercase())}).unwrap(); + + if let Some(stmt) = stmt { + if let Some(c) = stmt_parser.captures(&stmt) { + + Some(Statement { + lineno: lineno + 1, + line: stmt.to_owned(), + mnemonic: c.get(1).unwrap().as_str().to_string(), + op1: c.get(2).map(|x| x.as_str().to_string()), + op2: c.get(3).map(|x| x.as_str().to_string()), + }) + } else if let Some(c) = equ_parser.captures(&stmt) { + let key = c.get(1).unwrap().as_str(); + if defines.contains_key(key) { + eprintln!("Redefinition of {key} at line {}\n{}", lineno + 1, stmt); + any_error1 = true; + None + } else { + defines.insert(key.to_string(), c.get(2).unwrap().as_str().to_string()); + None + } + } else { + eprintln!("Syntax error on line {}: Line should be of either format:\n\n op1\n op1, op2\nwhere the mnemonic is only formed out of letters\n{}", lineno + 1, stmt); + any_error1 = true; + None + } + } else { + None + } + }) + .filter_map(|stmt| { + let line = stmt.line.to_owned(); + let lineno = stmt.lineno; + match ParsedStatement::from_statement(stmt) { + Ok(stmt) => Some((line, lineno, stmt)), + Err(e) => { + any_error2 = true; + eprintln!("Error at line {}: {}\n{}", lineno, e, line); + None + } + } + }) + .map(|(line, lineno, x)| (line, lineno, x.convert_to_instruction())) + .filter_map(|(line, lineno, instr)| { + + match instr { + Ok(stmt) => Some((line,stmt)), + Err(e) => { + any_error3 = true; + eprintln!("Semantic error at line {}: {}\n{}", lineno, e, line); + None + } + } + }) + .map(|(line, x)| format!("// {line}\n{x}")) + .collect::>(); + if any_error1 || any_error2 || any_error3 {std::process::exit(1)}; + println!("{}", statements.join("\n")); + +} + diff --git a/didasm/src/tokens.rs b/didasm/src/tokens.rs new file mode 100644 index 0000000..d554a96 --- /dev/null +++ b/didasm/src/tokens.rs @@ -0,0 +1,339 @@ +use regex::Regex; +use strum_macros::EnumString; +use std::str::FromStr; +use once_cell::sync::Lazy; +use strum_macros::Display; + +use Register::*; +use Mnemonic::*; + +impl From<&Mnemonic> for u16 { + fn from(value: &Mnemonic) -> Self { + match value { + In|Mov|Inc|Add|Jbe => 0, + Out|Dec|Jb|Jc|Adc => 1, + Pushf|Push|Neg|Jle|Cmp|Sub => 2, + Popf|Pop|Not|Jl|Sbb => 3, + Ret|Call|Shl|Sal|Je|Jz|Test|And => 4, + Iret|Jmp|Shr|Jo|Or => 5, + Hlt|Sar|Js|Xor => 6, + Jpe => 7, + Ja => 8, + Jae|Jnc => 9, + Jg => 10, + Jge => 11, + Jne|Jnz => 12, + Jno => 13, + Jns => 14, + Jpo => 15 + } + } +} +#[derive(Debug, EnumString, Display)] +#[strum(serialize_all = "snake_case")] + +pub enum Mnemonic { + // General instructions + Mov, + Push, + Pop, + Pushf, + Popf, + In, + Out, + // Arithmetic instructions + Add, + Adc, + Inc, + Sub, + Sbb, + Dec, + Neg, + Cmp, + // Logic instructions + Not, + And, + Or, + Xor, + Test, + // Shift instructions + Shl, + Sal, + Shr, + Sar, + // Control flow instructions + Call, + Ret, + Iret, + Jmp, + Hlt, + // Conditional jump instructions + Jbe, + Jb, + Jc, + Jle, + Jl, + Je, + Jz, + Jo, + Js, + Jpe, + Ja, + Jae, + Jnc, + Jg, + Jge, + Jne, + Jnz, + Jno, + Jns, + Jpo +} + +impl From for usize { + fn from(value: Register) -> Self { + match value { + Ra => 0, + Rb => 1, + Rc => 2, + Is => 3, + Xa => 4, + Xb => 5, + Ba => 6, + Bb => 7 + } + } +} + +#[derive(Debug, EnumString, Display, Clone, Copy)] +#[strum(serialize_all = "snake_case")] +pub enum Register { + // General purpose registers + Ra, + Rb, + Rc, + // Stack pointer + Is, + // Index register + Xa, + Xb, + // Base register + Ba, + Bb, +} + +#[derive(Debug, Clone)] +pub enum Expr { + // Identifier + Id(String), + // Numeric value + Int(isize) +} + +impl FromStr for Expr { + type Err = String; + + fn from_str(s: &str) -> Result { + static CHECK_IF_ID: Lazy = Lazy::new(|| { + Regex::new("^[a-z_][a-z0-9_]*$").unwrap() + }); + + if s.starts_with("0b") { + match isize::from_str_radix(&s[2..], 2) { + Ok(x) => Ok(Expr::Int(x)), + Err(e) => Err(format!("Invalid binary number '{s}', {e}")) + } + } else if s.starts_with("0x") { + match isize::from_str_radix(&s[2..], 16) { + Ok(x) => Ok(Expr::Int(x)), + Err(e) => Err(format!("Invalid hex number '{s}', {e}")) + } + } else { + match s.chars().nth(0) { + Some('0'..'9') => match s.parse::() { + Ok(x) => Ok(Expr::Int(x)), + Err(e) => Err(format!("Invalid number '{s}', {e}")) + }, + None => panic!("We should never have an empty string here"), + _ => if CHECK_IF_ID.is_match(s) { + Ok(Expr::Id(s.to_owned())) + } else { + Err(format!("Invalid identifier '{s}'")) + } + } + } + } +} + +#[derive(Debug, Clone)] +pub enum Operand { + /// + Reg(Register), + /// + Imm(Expr), + /// [] + Direct(Expr), + /// [[]] + Indirect(Expr), + /// [] + RegIndirect(Register), + /// [+] + /// [][][+] + /// [++] + RegSumAutoincrement { + b: Register, + x: Register + }, + /// [][-] + /// [+-] + RegSumAutodecrement { + b: Register, + x: Register + }, + /// []+ + /// [] + /// [+] + /// []. + Based { + b: Register, + depls: Expr + }, + /// []+ + /// [] + /// [+] + /// []. + Indexed { + x: Register, + depls: Expr + }, + /// [][]+ + /// [][] + /// [++] + /// [][]. + BasedIndexed { + b: Register, + x: Register, + depls: Expr + } +} + +use RegOrExpr::*; +enum RegOrExpr { + Reg(Register), + Expression(Expr) +} + +impl FromStr for RegOrExpr { + type Err = String; + fn from_str(s: &str) -> Result { + if let Ok(reg) = Register::from_str(s) { + Ok(RegOrExpr::Reg(reg)) + } else { + Ok(RegOrExpr::Expression(Expr::from_str(s)?)) + } + } +} + +impl FromStr for Operand { + type Err = String; + fn from_str(s: &str) -> Result { + static CHECK_IF_DOUBLE_INDIRECT: Lazy = Lazy::new(|| { + Regex::new(r"^\[\s*\[\s*(.+?)\s*\]\s*\]$").unwrap() + }); + static CHECK_OUTER_BRACKETS: Lazy = Lazy::new(|| { + Regex::new(r"^\[\s*(.+)\s*\]$").unwrap() + }); + static CHECK_INSIDE_BRACKETS: Lazy = Lazy::new(|| { + Regex::new(r"^([a-z0-9_]+)\s*\+\s*([a-z0-9_]+)\s*(?:\+\s*([a-z0-9_]+)|([+\-]))?$").unwrap() + }); + + + + if let Some(cap) = CHECK_IF_DOUBLE_INDIRECT.captures(s) { + let (_, [op]) = cap.extract(); + if let Ok(_) = Register::from_str(op) { + Err(format!("This indirect addressing syntax cannot be done on registers! ({op})")) + } else { + match Expr::from_str(op) { + Ok(s) => Ok(Operand::Indirect(s)), + Err(e) => Err(format!("Syntax error on indirect addressing '{op}': {e}")) + } + } + } else if let Some(cap) = CHECK_OUTER_BRACKETS.captures(s) { + let (_, [op]) = cap.extract(); + if let Ok(reg) = Register::from_str(op) { + if matches!(reg, Ra|Rb|Rc|Is) { + Err(format!("{reg} is not supported in indirect register addresing!")) + } else { + Ok(Operand::RegIndirect(reg)) + } + } else if let Ok(expr) = Expr::from_str(op) { + Ok(Operand::Direct(expr)) + } else if let Some(cap) = CHECK_INSIDE_BRACKETS.captures(op) { + let op1 = RegOrExpr::from_str(cap.get(1).unwrap().as_str())?; + let op2 = RegOrExpr::from_str(cap.get(2).unwrap().as_str())?; + let op3 = cap.get(3).map(|x| RegOrExpr::from_str(x.as_str())).transpose()?; + let sign = cap.get(4).map(|x| x.as_str()); + match (op1, op2, op3, sign) { + (Reg( b @ (Ba|Bb) ), Reg( x @ (Xa|Xb) ), None, None) | + (Reg( x @ (Xa|Xb) ), Reg( b @ (Ba|Bb) ), None, None) + => Ok(Operand::RegSum { b, x }), + (Reg( b @ (Ba|Bb) ), Reg( x @ (Xa|Xb) ), Some(Expression(e)), None) | + (Reg( x @ (Xa|Xb) ), Reg( b @ (Ba|Bb) ), Some(Expression(e)), None) | + (Reg( b @ (Ba|Bb) ), Expression(e), Some(Reg( x @ (Xa|Xb) )), None) | + (Reg( x @ (Xa|Xb) ), Expression(e), Some(Reg( b @ (Ba|Bb) )), None) | + (Expression(e), Reg( b @ (Ba|Bb) ), Some(Reg( x @ (Xa|Xb) )), None) | + (Expression(e), Reg( x @ (Xa|Xb) ), Some(Reg( b @ (Ba|Bb) )), None) + => Ok(Operand::BasedIndexed { b, x, depls: e }), + (Reg( b @ (Ba|Bb) ), Reg( x @ (Xa|Xb) ), None, Some("+")) | + (Reg( x @ (Xa|Xb) ), Reg( b @ (Ba|Bb) ), None, Some("+")) + => Ok(Operand::RegSumAutoincrement { b, x }), + (Reg( b @ (Ba|Bb) ), Reg( Xa ), None, Some("-")) | + (Reg( Xa ), Reg( b @ (Ba|Bb) ), None, Some("-")) + => Ok(Operand::RegSumAutodecrement { b, x: Xa }), + (Reg(_), Reg( Xb ), None, Some("-")) | + (Reg( Xb ), Reg(_), None, Some("-")) + => Err(format!("xb is not supported in autodecrement instructions")), + (Reg( b @ (Ba|Bb) ), Expression(e), None, None) | + (Expression(e), Reg( b @ (Ba|Bb) ), None, None) + => Ok(Operand::Based { b, depls: e }), + (Reg( x @ (Xa|Xb) ), Expression(e), None, None) | + (Expression(e), Reg( x @ (Xa|Xb) ), None, None) + => Ok(Operand::Indexed { x, depls: e }), + + + + + (Reg(_), Reg(_), Some(Reg(_)), _) + => Err(format!("Sum between 3 registers is not supported by the ISA")), + (Reg(Ba|Bb), Reg(Ba|Bb), _, _) | (Reg(Xa|Xb), Reg(Xa|Xb), _, _) | + (_, Reg(Ba|Bb), Some(Reg(Ba|Bb)), _) | (_, Reg(Xa|Xb), Some(Reg(Xa|Xb)), _) | + (Reg(Ba|Bb), _, Some(Reg(Ba|Bb)), _) | (Reg(Xa|Xb), _, Some(Reg(Xa|Xb)), _) + => Err(format!("Using the same register group (B/X) between 2 registers is not allowed in an operand")), + (Expression(_), Expression(_), _, _) | + (Expression(_),_,Some(Expression(_)), _) | + (_, Expression(_), Some(Expression(_)), _) + => Err(format!("Sum between 2 or more expressions is not supported by the ISA")), + (Reg(x @ (Ra|Rb|Rc|Is)), _, _, _) | + (_, Reg(x @ (Ra|Rb|Rc|Is)), _, _) | + (_, _, Some(Reg(x @ (Ra|Rb|Rc|Is))), _) + => Err(format!("Register sum syntax does not support register {x}")), + _ => Err(format!("Something went wrong! Report the given instruction to the developer for more specific error messages!")) + } + // Err("".to_string()) + } else { + Err(format!("Syntax error between brackets: '{s}'")) + } + } else { + match RegOrExpr::from_str(s)? { + Reg(r) => Ok(Operand::Reg(r)), + Expression(e) => Ok(Operand::Imm(e)) + } + } + } +} \ No newline at end of file