diff --git a/controller.py b/controller.py index ab35585..8effdc0 100755 --- a/controller.py +++ b/controller.py @@ -247,7 +247,7 @@ def controller( num_workers, queuedepth, compressionlevel, - qemu_output, + engine_output, goldenrun=True, logger=hdf5collector, qemu_pre=None, @@ -283,7 +283,7 @@ def controller( goldenrun_data, faultlist, ] = run_goldenrun( - config_qemu, qemu_output, queue_output, faultlist, qemu_pre, qemu_post + config_qemu, engine_output, queue_output, faultlist, qemu_pre, qemu_post ) pickle.dump( ( @@ -376,6 +376,7 @@ def controller( config_qemu, faults["index"], queue_output, + engine_output, pregoldenrun_data, goldenrun_data, True, @@ -390,7 +391,7 @@ def controller( config_qemu, faults["index"], queue_output, - qemu_output, + engine_output, goldenrun_data, True, queue_ram_usage, @@ -673,7 +674,7 @@ def process_arguments(args): parguments["num_workers"], # num_workers parguments["queuedepth"], # queuedepth parguments["compressionlevel"], # compressionlevel - args.debug, # qemu_output + args.debug, # engine_output parguments["goldenrun"], # goldenrun hdf5collector, # logger None, # qemu_pre diff --git a/emulation_worker/Cargo.lock b/emulation_worker/Cargo.lock index 15d0e00..8eaec9d 100644 --- a/emulation_worker/Cargo.lock +++ b/emulation_worker/Cargo.lock @@ -60,9 +60,12 @@ name = "emulation_worker" version = "0.1.0" dependencies = [ "capstone", + "log", "num", "priority-queue", "pyo3", + "simplelog", + "time", "unicorn-engine", ] @@ -88,6 +91,12 @@ version = "1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da2d6f23ffea9d7e76c53eee25dfb67bcd8fde7f1198b0855350698c9f07c780" +[[package]] +name = "itoa" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "453ad9f582a441959e5f0d088b02ce04cfe8d51a8eaf077f12ac6d3e94164ca6" + [[package]] name = "libc" version = "0.2.139" @@ -104,6 +113,15 @@ dependencies = [ "scopeguard", ] +[[package]] +name = "log" +version = "0.4.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" +dependencies = [ + "cfg-if", +] + [[package]] name = "memoffset" version = "0.6.5" @@ -189,6 +207,15 @@ dependencies = [ "autocfg", ] +[[package]] +name = "num_threads" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2819ce041d2ee131036f4fc9d6ae7ae125a3a40e97ba64d04fe799ad9dabbb44" +dependencies = [ + "libc", +] + [[package]] name = "once_cell" version = "1.17.0" @@ -327,6 +354,23 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" +[[package]] +name = "serde" +version = "1.0.152" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb7d1f0d3021d347a83e556fc4683dea2ea09d87bccdf88ff5c12545d89d5efb" + +[[package]] +name = "simplelog" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acee08041c5de3d5048c8b3f6f13fafb3026b24ba43c6a695a0c76179b844369" +dependencies = [ + "log", + "termcolor", + "time", +] + [[package]] name = "smallvec" version = "1.10.0" @@ -350,6 +394,44 @@ version = "0.12.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9410d0f6853b1d94f0e519fb95df60f29d2c1eff2d921ffdf01a4c8a3b54f12d" +[[package]] +name = "termcolor" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "time" +version = "0.3.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd0cbfecb4d19b5ea75bb31ad904eb5b9fa13f21079c3b92017ebdf4999a5890" +dependencies = [ + "itoa", + "libc", + "num_threads", + "serde", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e153e1f1acaef8acc537e68b44906d2db6436e2b35ac2c6b42640fff91f00fd" + +[[package]] +name = "time-macros" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd80a657e71da814b8e5d60d3374fc6d35045062245d80224748ae522dd76f36" +dependencies = [ + "time-core", +] + [[package]] name = "unicode-ident" version = "1.0.6" @@ -375,6 +457,37 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e1766d682d402817b5ac4490b3c3002d91dfa0d22812f341609f97b08757359c" +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" +dependencies = [ + "winapi", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + [[package]] name = "windows-sys" version = "0.42.0" diff --git a/emulation_worker/Cargo.toml b/emulation_worker/Cargo.toml index 4935506..8b89aaf 100644 --- a/emulation_worker/Cargo.toml +++ b/emulation_worker/Cargo.toml @@ -14,3 +14,6 @@ unicorn-engine = "2.0.0" num = "0.4.0" priority-queue = "1.3.1" capstone = "0.11.0" +log = "0.4.17" +simplelog = "0.12.1" +time = "0.3.20" diff --git a/emulation_worker/src/architecture.rs b/emulation_worker/src/architecture.rs index 93f01da..4bb88e0 100644 --- a/emulation_worker/src/architecture.rs +++ b/emulation_worker/src/architecture.rs @@ -1,3 +1,4 @@ +use capstone::{prelude::BuildsCapstone, Capstone}; use pyo3::types::PyDict; use std::collections::HashMap; use std::sync::RwLockWriteGuard; @@ -71,6 +72,7 @@ pub enum Architecture { pub trait ArchitectureDependentOperations { fn initialize_unicorn(&self) -> Unicorn<()>; + fn initialize_cs_engine(&self) -> Capstone; fn initialize_registers( &self, uc: &mut Unicorn<()>, @@ -127,6 +129,21 @@ impl ArchitectureDependentOperations for ArchitectureDependentOperator { } } + fn initialize_cs_engine(self: &ArchitectureDependentOperator) -> Capstone { + match self.architecture { + Architecture::Arm => Capstone::new() + .arm() + .mode(capstone::arch::arm::ArchMode::Thumb) + .build() + .unwrap(), + Architecture::Riscv => Capstone::new() + .riscv() + .mode(capstone::arch::riscv::ArchMode::RiscV64) + .build() + .unwrap(), + } + } + fn dump_registers( self: &ArchitectureDependentOperator, uc: &mut Unicorn<()>, diff --git a/emulation_worker/src/hooks.rs b/emulation_worker/src/hooks.rs index 250b967..a469900 100644 --- a/emulation_worker/src/hooks.rs +++ b/emulation_worker/src/hooks.rs @@ -1,3 +1,4 @@ +use log::{debug, info}; use num::{BigUint, ToPrimitive}; use pyo3::types::{PyDict, PyList}; use std::{collections::HashMap, io, sync::Arc}; @@ -70,15 +71,15 @@ fn end_hook_cb( let counter = endpoints.get_mut(&address).unwrap(); if *counter > 1 { - println!( - "Decreasing endpoint counter for {:?} to {:?}", + debug!( + "Decreasing endpoint counter for 0x{:x} to {:?}", address, *counter ); *counter -= 1; } else { let mut endpoint = state.logs.endpoint.write().unwrap(); *endpoint = (address == first_endpoint, address, 1); - println!("Reached endpoint at {address:?}"); + info!("Reached endpoint at 0x{address:x}"); // Since this hook has been registered before the single step hook we need to call it // manullay to log the last instruction, since the callback would not be called otherwise @@ -102,6 +103,11 @@ fn end_hook_cb( } fn single_step_hook_cb(uc: &mut Unicorn<'_, ()>, address: u64, size: u32, state: &Arc) { + debug!( + "Single step\taddr 0x{:x}\tinstructions {:?}", + address, + uc.mem_read_as_vec(address, size as usize).unwrap() + ); let mut instruction_count = state.instruction_count.write().unwrap(); let undone_fault = undo_faults( uc, @@ -111,7 +117,7 @@ fn single_step_hook_cb(uc: &mut Unicorn<'_, ()>, address: u64, size: u32, state: ); if let Some(fault) = undone_fault { - if matches!(fault.kind, FaultType::Register) { + if !matches!(fault.kind, FaultType::Register) { dump_memory( uc, address, @@ -152,10 +158,10 @@ fn mem_hook_cb( meminfo.insert( (address, pc), MemInfo { - ins: address, + ins: pc, counter: 1, direction: if mem_type == MemType::READ { 0 } else { 1 }, - address: pc, + address, tbid: last_tbid, size, }, @@ -177,7 +183,7 @@ fn fault_hook_cb(uc: &mut Unicorn<'_, ()>, address: u64, _size: u32, state: &Arc return; } - println!("Executing fault at {address:?}"); + info!("Executing fault at 0x{address:x}"); let prefault_data; @@ -190,8 +196,8 @@ fn fault_hook_cb(uc: &mut Unicorn<'_, ()>, address: u64, _size: u32, state: &Arc .as_slice(), ); prefault_data = data.clone(); - println!( - "Overwriting {:?} with {:?}", + debug!( + "Overwriting 0x{:x} with 0x{:x}", data, apply_model(&data, fault) ); diff --git a/emulation_worker/src/hooks/util.rs b/emulation_worker/src/hooks/util.rs index c2b675a..d45c253 100644 --- a/emulation_worker/src/hooks/util.rs +++ b/emulation_worker/src/hooks/util.rs @@ -1,5 +1,6 @@ use crate::structs::{TbExecEntry, TbInfoBlock}; use crate::{Fault, FaultModel, FaultType, MemDump}; +use log::debug; use num::{BigUint, ToPrimitive}; use priority_queue::PriorityQueue; use std::collections::HashMap; @@ -85,7 +86,7 @@ pub fn undo_faults( let fault = faults.get(&address).unwrap(); - println!("Undoing fault"); + debug!("Undoing fault"); match fault.kind { FaultType::Register => { uc.reg_write(fault.address as i32, prefault_data.to_u64().unwrap()) diff --git a/emulation_worker/src/lib.rs b/emulation_worker/src/lib.rs index fe78673..8622012 100644 --- a/emulation_worker/src/lib.rs +++ b/emulation_worker/src/lib.rs @@ -1,4 +1,4 @@ -use capstone::{prelude::BuildsCapstone, Capstone}; +use log::{debug, error, info, warn, LevelFilter}; use priority_queue::PriorityQueue; use pyo3::{ prelude::*, @@ -6,6 +6,7 @@ use pyo3::{ }; use std::collections::HashMap; use std::sync::{Arc, RwLock}; +use time::macros::format_description; use unicorn_engine::unicorn_const::Permission; @@ -20,12 +21,40 @@ use crate::structs::{Fault, FaultModel, FaultType, Logs, MemDump, MemInfo, State mod hooks; use crate::hooks::initialize_hooks; +fn setup_logging(index: u64, debug: bool) { + let config = simplelog::ConfigBuilder::new() + .set_time_format_custom(format_description!( + "[year]-[month]-[day] [hour]:[minute]:[second],[subsecond digits:3]" + )) + .build(); + let mut loggers: Vec> = Vec::new(); + if debug { + loggers.push(simplelog::WriteLogger::new( + LevelFilter::Debug, + config.clone(), + std::fs::File::create(format!("log_{index:?}.txt")).unwrap(), + )); + } + let log_level = if debug { + LevelFilter::Debug + } else { + LevelFilter::Info + }; + loggers.push(simplelog::SimpleLogger::new(log_level, config)); + + simplelog::CombinedLogger::init(loggers).unwrap(); +} + #[pyfunction] fn run_unicorn( pregoldenrun_data: &PyDict, faults: Vec, config: &PyDict, + index: u64, + engine_output: bool, ) -> PyResult { + setup_logging(index, engine_output); + let arch_str = pregoldenrun_data .get_item("architecture") .unwrap() @@ -64,7 +93,11 @@ fn run_unicorn( let memmap: &PyDict = obj.extract()?; let address: u64 = memmap.get_item("address").unwrap().extract()?; let length: usize = memmap.get_item("length").unwrap().extract()?; - //println!("Mapping memory at {:x} size {:x}", address & (u64::MAX ^0xfff), cmp::max(length, 0x1000)); + debug!( + "Mapping memory at 0x{:x} size 0x{:x}", + address & (u64::MAX ^ 0xfff), + usize::max(length, 0x1000) + ); match emu.mem_map( address & (u64::MAX ^ 0xfff), usize::max(length, 0x1000), @@ -72,10 +105,10 @@ fn run_unicorn( ) { Ok(()) => {} Err(unicorn_engine::unicorn_const::uc_error::MAP) => { - println!("Memory space is already mapped. Ignoring...") + warn!("Memory space is already mapped. Ignoring...") } Err(unicorn_engine::unicorn_const::uc_error::NOMEM) => { - println!("Memory space too big, cannot allocate. Ignoring...") + warn!("Memory space too big, cannot allocate. Ignoring...") } Err(err) => panic!("failed mapping memory: {err:?}"), } @@ -90,9 +123,10 @@ fn run_unicorn( let address: u64 = memdump.get_item("address").unwrap().extract()?; let dumps: &PyList = memdump.get_item("dumps").unwrap().extract()?; let dump: Vec = dumps.get_item(0).unwrap().extract()?; + debug!("writing {:?} bytes to 0x{:x}", dump.len(), address); emu.mem_write(address, dump.as_slice()) - .expect("failed to write instructions"); + .unwrap_or_else(|_| error!("failed to write dumped data at 0x{:X}", address)); } let logs = Logs { @@ -112,11 +146,7 @@ fn run_unicorn( live_faults: RwLock::new(PriorityQueue::new()), instruction_count: RwLock::new(0), single_step_hook_handle: RwLock::new(None), - cs_engine: Capstone::new() - .arm() - .mode(capstone::arch::arm::ArchMode::Thumb) - .build() - .unwrap(), + cs_engine: arch_operator.initialize_cs_engine(), arch_operator: arch_operator.clone(), logs, }; @@ -130,8 +160,19 @@ fn run_unicorn( .unwrap() .extract()?; + info!("Starting emulation at 0x{:x}", start_address); emu.emu_start(start_address, 0, 0, max_instruction_count) - .unwrap_or_else(|_| println!("failed to emulate code at 0x{:x}", emu.pc_read().unwrap())); + .unwrap_or_else(|_| error!("failed to emulate code at 0x{:x}", emu.pc_read().unwrap())); + + { + let state = Arc::clone(&state_arc); + state.arch_operator.dump_registers( + emu, + state.logs.registerlist.write().unwrap(), + *state.tbcounter.read().unwrap(), + ); + } + info!("Finished emulation"); Python::with_gil(|py| Ok(state_arc.logs.to_object(py))) } diff --git a/emulation_worker/src/structs.rs b/emulation_worker/src/structs.rs index adbcfda..6ad3c22 100644 --- a/emulation_worker/src/structs.rs +++ b/emulation_worker/src/structs.rs @@ -173,7 +173,7 @@ impl ToPyObject for Logs { let endpoint = self.endpoint.read().unwrap(); if endpoint.2 == 1 { - dict.set_item("end_reason", format!("{}/1", endpoint.1)) + dict.set_item("end_reason", format!("endpoint {}/1", endpoint.1)) .unwrap(); } else { dict.set_item("end_reason", "max tb").unwrap(); diff --git a/faultclass.py b/faultclass.py index 116aaca..2756782 100644 --- a/faultclass.py +++ b/faultclass.py @@ -726,7 +726,7 @@ def python_worker( config_qemu, index, queue_output, - qemu_output, + engine_output, goldenrun_data=None, change_nice=False, queue_ram_usage=None, @@ -760,7 +760,7 @@ def python_worker( paths["config"], paths["data"], config_qemu, - qemu_output, + engine_output, index, qemu_custom_paths, ), @@ -817,6 +817,7 @@ def python_worker_unicorn( config_qemu, index, queue_output, + engine_output, pregoldenrun_data, goldenrun_data, change_nice=False, @@ -825,8 +826,8 @@ def python_worker_unicorn( if change_nice: os.nice(19) - logs = run_unicorn(pregoldenrun_data, fault_list, config_qemu) - logger.info(f"Ended qemu for exp {index}! Took {time.time() - t0}") + logs = run_unicorn(pregoldenrun_data, fault_list, config_qemu, index, engine_output) + logger.info(f"Ended unicorn for exp {index}! Took {time.time() - t0}") output = {} @@ -835,6 +836,7 @@ def python_worker_unicorn( output["endpoint"] = logs["endpoint"] output["end_reason"] = logs["end_reason"] output["memdumplist"] = logs["memdumplist"] + output["meminfo"] = logs["meminfo"] pdtbexeclist = pd.DataFrame(logs["tbexec"]) [pdtbexeclist, tblist] = filter_tb(