From 7b5158b802bfc0ea355944d4ad66883380e3747e Mon Sep 17 00:00:00 2001 From: Kevin Schneider Date: Fri, 27 Jan 2023 18:12:46 +0100 Subject: [PATCH 01/13] add full memory and memory map dump option to faultplugin --- faultclass.py | 6 ++++ faultplugin/Makefile | 5 ++- faultplugin/faultplugin.c | 58 +++++++++++++++++++++++++++++++++ faultplugin/memmapdump.c | 67 +++++++++++++++++++++++++++++++++++++++ faultplugin/memmapdump.h | 31 ++++++++++++++++++ qemu | 2 +- 6 files changed, 167 insertions(+), 2 deletions(-) create mode 100644 faultplugin/memmapdump.c create mode 100644 faultplugin/memmapdump.h diff --git a/faultclass.py b/faultclass.py index 168cc15..bf8165e 100644 --- a/faultclass.py +++ b/faultclass.py @@ -438,6 +438,7 @@ def readout_data( max_ram_usage = 0 regtype = None tbfaulted = 0 + architecture = "" while 1: line = pipe.readline() @@ -470,6 +471,10 @@ def readout_data( state = "memdump" memdump = 1 + elif "[Architecture]" in line: + split = line.split("]:") + architecture = split[1].strip() + elif "[END]" in line: state = "none" logger.info( @@ -530,6 +535,7 @@ def readout_data( output["faultlist"] = faultlist output["endpoint"] = endpoint output["end_reason"] = end_reason + output["architecture"] = architecture if memdump == 1: output["memdumplist"] = memdumplist diff --git a/faultplugin/Makefile b/faultplugin/Makefile index 23dafe7..9c63a5d 100644 --- a/faultplugin/Makefile +++ b/faultplugin/Makefile @@ -62,10 +62,13 @@ tb_exec_data_collection.o: tb_exec_data_collection.c tb_faulted_collection.o : tb_faulted_collection.c $(CC) $(CFLAGS) -c -o $@ $< +memmapdump.o : memmapdump.c + $(CC) $(CFLAGS) -c -o $@ $< + %.o: %.c $(CC) $(CFLAGS) -c -o $@ $< -lib%.so: %.o lib/avl.o faultdata.o registerdump.o singlestep.o fault_list.o fault_injection.o tb_info_data_collection.o tb_exec_data_collection.o tb_faulted_collection.o +lib%.so: %.o lib/avl.o faultdata.o registerdump.o singlestep.o fault_list.o fault_injection.o tb_info_data_collection.o tb_exec_data_collection.o tb_faulted_collection.o memmapdump.o $(CC) -shared -Wl,-soname,$@ -o $@ $^ $(LDLIBS) clean: diff --git a/faultplugin/faultplugin.c b/faultplugin/faultplugin.c index 588d424..1796c0d 100644 --- a/faultplugin/faultplugin.c +++ b/faultplugin/faultplugin.c @@ -40,6 +40,7 @@ #include "tb_info_data_collection.h" #include "tb_exec_data_collection.h" #include "tb_faulted_collection.h" +#include "memmapdump.h" //DEBUG #include #include @@ -95,6 +96,11 @@ int tb_info_enabled; int tb_exec_order_enabled; +int full_mem_dump_enabled; + + +int memmap_dump_enabled; + /* data structures for memory access */ @@ -811,6 +817,14 @@ void plugin_dump_mem_information() } } +void memmap_dump_register_page(void *key, void *value, void *user_data) { + g_autoptr(GString) out = g_string_new(""); + g_string_printf(out, "Dumping page at %p\n", *(uint64_t*)key); + qemu_plugin_outs(out->str); + insert_memorydump_config(*(uint64_t*)key, 0x1000); + free(key); +} + /** * plugin_end_information_dump * @@ -819,6 +833,27 @@ void plugin_dump_mem_information() */ void plugin_end_information_dump(GString *end_reason) { + if (memmap_dump_enabled) { + read_memmap_information_module(); + } + + if (full_mem_dump_enabled) { + g_autoptr(GHashTable) accessed_pages = g_hash_table_new(g_int64_hash, g_int64_equal); + + struct mem_info_t *item = mem_info_list; + while(item != NULL) + { + if (item->direction == 1) { + uint64_t *page_address = malloc(sizeof(uint64_t)); + *page_address = item->memmory_address & 0xfffffffffffff000; + g_hash_table_add(accessed_pages, page_address); + } + item = item->next; + } + + g_hash_table_foreach(accessed_pages, memmap_dump_register_page, NULL); + } + int *error = NULL; if(end_point->location.trignum == 4) { @@ -1207,6 +1242,26 @@ int readout_control_config(GString *conf) tb_info_enabled = 0; return 1; } + if(strstr(conf->str, "enable_memmap_dump")) + { + memmap_dump_enabled = 1; + return 1; + } + if(strstr(conf->str, "disable_memmap_dump")) + { + memmap_dump_enabled = 0; + return 1; + } + if(strstr(conf->str, "enable_full_mem_dump")) + { + full_mem_dump_enabled = 1; + return 1; + } + if(strstr(conf->str, "disable_full_mem_dump")) + { + full_mem_dump_enabled = 0; + return 1; + } if(strstr(conf->str, "enable_tb_exec_list")) { tb_exec_order_enabled = 1; @@ -1422,6 +1477,9 @@ QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id, g_string_append(out, "Done\n"); g_string_append_printf(out, "[Start]: Reached end of initialisation, starting guest now\n"); qemu_plugin_outs(out->str); + + g_string_printf(out, "$$$[Architecture]:%s\n", info->target_name); + plugin_write_to_data_pipe(out->str, out->len); return 0; ABORT: if(mem_avl_root != NULL) diff --git a/faultplugin/memmapdump.c b/faultplugin/memmapdump.c new file mode 100644 index 0000000..6648b61 --- /dev/null +++ b/faultplugin/memmapdump.c @@ -0,0 +1,67 @@ +/* + * Copyright 2023 Kevin Schneider + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * This file contains all functions needed to collect register data and send + * it over the data pipe + */ +#include "memmapdump.h" + +typedef struct AddrRange AddrRange; +struct AddrRange { + Int128 start; + Int128 size; +}; + +struct FlatRange { + MemoryRegion *mr; + hwaddr offset_in_region; + AddrRange addr; + uint8_t dirty_log_mask; + bool romd_mode; + bool readonly; + bool nonvolatile; +}; + +bool dump_memmap_information(Int128 start, Int128 len, const MemoryRegion *mr, hwaddr offset_in_region, void *opaque) { + g_autoptr(GString) out = g_string_new(""); + g_string_printf(out, "$$ 0x%lx | 0x%lx \n", int128_get64(start), int128_get64(len)); + plugin_write_to_data_pipe(out->str, out->len); + return false; +} + +void flatview_for_each_range(FlatView *fv, flatview_cb cb , void *opaque) +{ + FlatRange *fr; + + assert(fv); + assert(cb); + + for (fr = fv->ranges; fr < fv->ranges + fv->nr; ++fr) { + if (cb(fr->addr.start, fr->addr.size, fr->mr, + fr->offset_in_region, opaque)) { + break; + } + } +} + +void read_memmap_information_module(void) +{ + g_autoptr(GString) out = g_string_new(""); + g_string_printf(out, "$$$[Memory Map]\n"); + plugin_write_to_data_pipe(out->str, out->len); + + AddressSpace *addr_space = qemu_plugin_get_address_space(); + flatview_for_each_range(address_space_to_flatview(addr_space), dump_memmap_information, NULL); +} diff --git a/faultplugin/memmapdump.h b/faultplugin/memmapdump.h new file mode 100644 index 0000000..6733170 --- /dev/null +++ b/faultplugin/memmapdump.h @@ -0,0 +1,31 @@ +/* + * Copyright 2023 Kevin Schneider + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * This file contains the header functions for managing register data. + */ + +#ifndef FAULTPLUGIN_MEMMAPDUMP_H +#define FAULTPLUGIN_MEMMAPDUMP_H + +#include "qemu/osdep.h" +#include + +#include "exec/memory.h" + +#include "faultplugin.h" + +void read_memmap_information_module(void); + +#endif diff --git a/qemu b/qemu index 1ab3c79..f8f8d90 160000 --- a/qemu +++ b/qemu @@ -1 +1 @@ -Subproject commit 1ab3c799b6fd23da29eb41a3accf8d053ee2d9cc +Subproject commit f8f8d909a503b967e35f2931acf0c44a73dc4847 From dbd8b6715c08fab9acdf8c71772756ae1f5b1753 Mon Sep 17 00:00:00 2001 From: Kevin Schneider Date: Fri, 27 Jan 2023 18:15:28 +0100 Subject: [PATCH 02/13] dump full memory state after pregoldenrun and pass to unicorn worker --- README.md | 13 +++++++++-- controller.py | 65 +++++++++++++++++++++++++++++++++++++-------------- faultclass.py | 25 +++++++++++++++++--- goldenrun.py | 2 +- 4 files changed, 81 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index 90cbeba..e5b7924 100644 --- a/README.md +++ b/README.md @@ -40,11 +40,15 @@ mkdir -p qemu/build/debug cd qemu/build/debug ./../../configure --target-list=arm-softmmu --enable-debug --enable-plugins --disable-sdl --disable-gtk --disable-curses --disable-vnc make -j {CPUCORENUMBER} -cd ../../../faultplugin/ +cd - +cd faultplugin make +cd emulation_worker +cargo build --release +cp target/release/libemulation_worker.so ../emulation_worker.so ``` -With this, *archie-qemu* is build in qemu/build/debug/ and the plugin is build in *faultplugin/* +With this, *archie-qemu* is built in qemu/build/debug/, the plugin is built in *faultplugin/* and the unicorn emulation worker is built and moved to the project's root directory. If you change the build directory for *archie-qemu*, please change the path in the [Makefile](faultplugin/Makefile) in the *faultplugin/* folder for building the plugin. ## In [archie](https://github.com/Fraunhofer-AISEC/archie) @@ -105,3 +109,8 @@ targ rem:localhost:1234 ``` QEMU will wait unil the GDB session is attached. The debugging mode is only suitable for the analysis of a low number of faults. Stepping through a large amount of faults is cumbersome. This should be considered when adjusting the JSON files. +#### Unicorn Engine + +Instead of QEMU, the unicorn engine can be used for emulating the experiments. This feature can be used interchangeably with the QEMU emulation without the need to adjust any of the configuration files. One exception for this are register faults, which have different target addresses between the two versions. The mapping for the registers can be looked up in the source code of unicorn's [Rust bindings](https://github.com/unicorn-engine/unicorn/tree/master/bindings/rust/src). To enable this feature the *--unicorn* flag can be set. + +Using the unicorn engine can result in a substantial increase in performance. However, this mode is not capable of emulating any features related to the hardware of the target platform such as interrupts or communication with devices. diff --git a/controller.py b/controller.py index 661915d..eba46a5 100755 --- a/controller.py +++ b/controller.py @@ -21,7 +21,7 @@ pass from faultclass import Fault, Trigger -from faultclass import python_worker +from faultclass import python_worker, python_worker_unicorn from hdf5logger import hdf5collector from goldenrun import run_goldenrun @@ -251,6 +251,7 @@ def controller( qemu_pre=None, qemu_post=None, logger_postprocess=None, + unicorn_emulation=False ): """ This function builds the unrolled fault structure, performs golden run and @@ -271,22 +272,25 @@ def controller( # Storing and restoring goldenrun_data with pickle is a temporary fix # A better solution is to parse the goldenrun_data from the existing hdf5 file + pregoldenrun_data = {} goldenrun_data = {} if goldenrun: [ config_qemu["max_instruction_count"], + pregoldenrun_data, goldenrun_data, faultlist, ] = run_goldenrun( config_qemu, qemu_output, queue_output, faultlist, qemu_pre, qemu_post ) pickle.dump( - (config_qemu["max_instruction_count"], goldenrun_data, faultlist), + (config_qemu["max_instruction_count"], pregoldenrun_data, goldenrun_data, faultlist), lzma.open("bkup_goldenrun_results.xz", "wb"), ) else: ( config_qemu["max_instruction_count"], + pregoldenrun_data, goldenrun_data, faultlist, ) = pickle.load(lzma.open("bkup_goldenrun_results.xz", "rb")) @@ -341,22 +345,38 @@ def controller( faults = faultlist[itter] itter += 1 - p = Process( - name=f"worker_{faults['index']}", - target=python_worker, - args=( - faults["faultlist"], - config_qemu, - faults["index"], - queue_output, - qemu_output, - goldenrun_data, - True, - queue_ram_usage, - qemu_pre, - qemu_post, - ), - ) + if unicorn_emulation: + p = Process( + name=f"worker_{faults['index']}", + target=python_worker_unicorn, + args=( + faults["faultlist"], + config_qemu, + faults["index"], + queue_output, + pregoldenrun_data, + goldenrun_data, + True, + ), + ) + else: + p = Process( + name=f"worker_{faults['index']}", + target=python_worker, + args=( + faults["faultlist"], + config_qemu, + faults["index"], + queue_output, + qemu_output, + goldenrun_data, + True, + queue_ram_usage, + qemu_pre, + qemu_post, + ), + ) + p.start() p_list.append({"process": p, "start_time": time.time()}) @@ -498,6 +518,12 @@ def get_argument_parser(): help="Enables connection to the target with gdb. Port 1234", required=False, ) + parser.add_argument( + "--unicorn", + action="store_true", + help="Enables emulation through unicorn engine instead of QEMU", + required=False, + ) return parser @@ -526,6 +552,8 @@ def process_arguments(args): if args.compressionlevel is None: parguments["compressionlevel"] = 1 + parguments["unicorn_emulation"] = args.unicorn + hdf5file = Path(args.hdf5file) if hdf5file.parent.exists() is False: print( @@ -629,4 +657,5 @@ def process_arguments(args): None, # qemu_pre None, # qemu_post None, # logger_postprocess + parguments["unicorn_emulation"], # enable unicorn emulation ) diff --git a/faultclass.py b/faultclass.py index bf8165e..c26dcfb 100644 --- a/faultclass.py +++ b/faultclass.py @@ -5,6 +5,7 @@ import pandas as pd import prctl import shlex +from emulation_worker import run_unicorn import logging @@ -639,7 +640,7 @@ def delete_fifos(): os.rmdir(path) -def configure_qemu(control, config_qemu, num_faults, memorydump_list): +def configure_qemu(control, config_qemu, num_faults, memorydump_list, index): """ Function to write commands and configuration needed to start qemu plugin """ @@ -647,6 +648,11 @@ def configure_qemu(control, config_qemu, num_faults, memorydump_list): out = out + "$$ max_duration: {}\n".format(config_qemu["max_instruction_count"]) out = out + "$$ num_faults: {}\n".format(num_faults) + if index is -2: + out = out + "$$enable_full_mem_dump\n" + else: + out = out + "$$disable_full_mem_dump\n" + if "tb_exec_list" in config_qemu: if config_qemu["tb_exec_list"] is False: out = out + "$$disable_tb_exec_list\n" @@ -674,7 +680,7 @@ def configure_qemu(control, config_qemu, num_faults, memorydump_list): out = out + "$$ end_address: {}\n".format(end_loc["address"]) out = out + "$$ end_counter: {}\n".format(end_loc["counter"]) - if memorydump_list is not None: + if index != -2 and memorydump_list is not None: out = out + "$$num_memregions: {}\n".format(len(memorydump_list)) out = out + "$$$[Memory]\n" for memorydump in memorydump_list: @@ -748,7 +754,7 @@ def python_worker( else: memorydump = None logger.debug("Start configuring") - configure_qemu(control_fifo, config_qemu, len(fault_list), memorydump) + configure_qemu(control_fifo, config_qemu, len(fault_list), memorydump, index) enable_qemu(control_fifo) logger.debug("Started QEMU") """Write faults to config pipe""" @@ -782,3 +788,16 @@ def python_worker( p_qemu.terminate() p_qemu.join() logger.warning("Terminate Worker {}".format(index)) + + +def python_worker_unicorn( + fault_list, + config_qemu, + index, + queue_output, + pregoldenrun_data, + goldenrun_data, + change_nice=False, +): + run_unicorn(pregoldenrun_data, config_qemu) + return diff --git a/goldenrun.py b/goldenrun.py index 1f626a3..f6df31f 100644 --- a/goldenrun.py +++ b/goldenrun.py @@ -99,7 +99,7 @@ def run_goldenrun( ) ) - return [config_qemu["max_instruction_count"], experiment["data"], faultconfig] + return [config_qemu["max_instruction_count"], experiments[0]["data"], experiment["data"], faultconfig] def find_insn_addresses_in_tb(insn_address, data): From f77f054c3ffb7cf4d1c8dbd46ccd078325561faf Mon Sep 17 00:00:00 2001 From: Kevin Schneider Date: Fri, 27 Jan 2023 18:16:31 +0100 Subject: [PATCH 03/13] add unicorn emulation worker with meminfo logging and pregoldenrun initialization --- .github/workflows/build.yml | 8 + .github/workflows/lint.yml | 19 ++ build.sh | 8 +- emulation_worker/.github/workflows/CI.yml | 70 +++++ emulation_worker/.gitignore | 72 +++++ emulation_worker/Cargo.lock | 308 ++++++++++++++++++++++ emulation_worker/Cargo.toml | 13 + emulation_worker/pyproject.toml | 14 + emulation_worker/src/arm.rs | 85 ++++++ emulation_worker/src/hooks.rs | 105 ++++++++ emulation_worker/src/lib.rs | 83 ++++++ emulation_worker/src/logs.rs | 50 ++++ faultclass.py | 18 +- 13 files changed, 851 insertions(+), 2 deletions(-) create mode 100644 emulation_worker/.github/workflows/CI.yml create mode 100644 emulation_worker/.gitignore create mode 100644 emulation_worker/Cargo.lock create mode 100644 emulation_worker/Cargo.toml create mode 100644 emulation_worker/pyproject.toml create mode 100644 emulation_worker/src/arm.rs create mode 100644 emulation_worker/src/hooks.rs create mode 100644 emulation_worker/src/lib.rs create mode 100644 emulation_worker/src/logs.rs diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 65c08d1..c3e4ca7 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -20,6 +20,11 @@ jobs: - name: install packages run: sudo apt update; sudo apt upgrade -y; sudo apt install -y build-essential ninja-build libglib2.0-dev libfdt-dev libpixman-1-dev zlib1g-dev python3-tables python3-pandas python3-prctl python3-json5 + - name: Install latest stable Rust toolchain + uses: actions-rs/toolchain@v1 + with: + toolchain: stable + - name: Checkout submodules run: git submodule update --init @@ -29,5 +34,8 @@ jobs: - name: Build Faultplugin run: cd faultplugin; make -j; echo "done" + - name: Build Emulation Worker + run: cd emulation_worker; cargo build --release; mv target/release/libemulation_worker.so ../emulation_worker.so; echo "done" + - name: Run ARCHIE run: cd examples/stm32; ./run.sh; cd ../riscv64; ./run.sh diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index bef026f..153a6db 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -24,3 +24,22 @@ jobs: - run: | black --version black --check --diff *.py analysis/*.py + + clippy: + name: Clippy + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: stable + override: true + components: rustfmt, clippy + - uses: actions-rs/cargo@v1 + with: + command: fmt + args: --all --manifest-path ./emulation_worker/Cargo.toml -- --check + - run: | + cd emulation_worker + cargo clippy -- -D warnings diff --git a/build.sh b/build.sh index 68ad777..6ae5722 100755 --- a/build.sh +++ b/build.sh @@ -97,6 +97,12 @@ cd ../../../faultplugin/ make clean && make cd .. +echo "Building emulation worker" +cd emulation_worker +cargo build --release +cp target/release/libemulation_worker.so ../emulation_worker.so +cd - + echo "Test ARCHIE" cd examples/stm32 ./run.sh @@ -112,4 +118,4 @@ select yn in "YES" "NO"; do esac echo "Please type the number corresponding to Yes or No" done -echo "Archie was build and tested successfully" +echo "Archie was built and tested successfully" diff --git a/emulation_worker/.github/workflows/CI.yml b/emulation_worker/.github/workflows/CI.yml new file mode 100644 index 0000000..074743e --- /dev/null +++ b/emulation_worker/.github/workflows/CI.yml @@ -0,0 +1,70 @@ +name: CI + +on: + push: + branches: + - main + - master + pull_request: + workflow_dispatch: + +jobs: + linux: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: PyO3/maturin-action@v1 + with: + manylinux: auto + command: build + args: --release --sdist -o dist --find-interpreter + - name: Upload wheels + uses: actions/upload-artifact@v3 + with: + name: wheels + path: dist + + windows: + runs-on: windows-latest + steps: + - uses: actions/checkout@v3 + - uses: PyO3/maturin-action@v1 + with: + command: build + args: --release -o dist --find-interpreter + - name: Upload wheels + uses: actions/upload-artifact@v3 + with: + name: wheels + path: dist + + macos: + runs-on: macos-latest + steps: + - uses: actions/checkout@v3 + - uses: PyO3/maturin-action@v1 + with: + command: build + args: --release -o dist --universal2 --find-interpreter + - name: Upload wheels + uses: actions/upload-artifact@v3 + with: + name: wheels + path: dist + + release: + name: Release + runs-on: ubuntu-latest + if: "startsWith(github.ref, 'refs/tags/')" + needs: [ macos, windows, linux ] + steps: + - uses: actions/download-artifact@v3 + with: + name: wheels + - name: Publish to PyPI + uses: PyO3/maturin-action@v1 + env: + MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_API_TOKEN }} + with: + command: upload + args: --skip-existing * \ No newline at end of file diff --git a/emulation_worker/.gitignore b/emulation_worker/.gitignore new file mode 100644 index 0000000..af3ca5e --- /dev/null +++ b/emulation_worker/.gitignore @@ -0,0 +1,72 @@ +/target + +# Byte-compiled / optimized / DLL files +__pycache__/ +.pytest_cache/ +*.py[cod] + +# C extensions +*.so + +# Distribution / packaging +.Python +.venv/ +env/ +bin/ +build/ +develop-eggs/ +dist/ +eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +include/ +man/ +venv/ +*.egg-info/ +.installed.cfg +*.egg + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt +pip-selfcheck.json + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.cache +nosetests.xml +coverage.xml + +# Translations +*.mo + +# Mr Developer +.mr.developer.cfg +.project +.pydevproject + +# Rope +.ropeproject + +# Django stuff: +*.log +*.pot + +.DS_Store + +# Sphinx documentation +docs/_build/ + +# PyCharm +.idea/ + +# VSCode +.vscode/ + +# Pyenv +.python-version \ No newline at end of file diff --git a/emulation_worker/Cargo.lock b/emulation_worker/Cargo.lock new file mode 100644 index 0000000..f260729 --- /dev/null +++ b/emulation_worker/Cargo.lock @@ -0,0 +1,308 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "cc" +version = "1.0.78" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a20104e2335ce8a659d6dd92a51a767a0c062599c73b343fd152cb401e828c3d" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "cmake" +version = "0.1.49" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db34956e100b30725f2eb215f90d4871051239535632f84fea3bc92722c66b7c" +dependencies = [ + "cc", +] + +[[package]] +name = "emulation_worker" +version = "0.1.0" +dependencies = [ + "pyo3", + "unicorn-engine", +] + +[[package]] +name = "indoc" +version = "1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da2d6f23ffea9d7e76c53eee25dfb67bcd8fde7f1198b0855350698c9f07c780" + +[[package]] +name = "libc" +version = "0.2.139" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "201de327520df007757c1f0adce6e827fe8562fbc28bfd9c15571c66ca1f5f79" + +[[package]] +name = "lock_api" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "435011366fe56583b16cf956f9df0095b405b82d76425bc8981c0e22e60ec4df" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "memoffset" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce" +dependencies = [ + "autocfg", +] + +[[package]] +name = "once_cell" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f61fba1741ea2b3d6a1e3178721804bb716a68a6aeba1149b5d52e3d464ea66" + +[[package]] +name = "parking_lot" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba1ef8814b5c993410bb3adfad7a5ed269563e4a2f90c41f5d85be7fb47133bf" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-sys", +] + +[[package]] +name = "pkg-config" +version = "0.3.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160" + +[[package]] +name = "proc-macro2" +version = "1.0.50" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ef7d57beacfaf2d8aee5937dab7b7f28de3cb8b1828479bb5de2a7106f2bae2" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "pyo3" +version = "0.17.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "268be0c73583c183f2b14052337465768c07726936a260f480f0857cb95ba543" +dependencies = [ + "cfg-if", + "indoc", + "libc", + "memoffset", + "parking_lot", + "pyo3-build-config", + "pyo3-ffi", + "pyo3-macros", + "unindent", +] + +[[package]] +name = "pyo3-build-config" +version = "0.17.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28fcd1e73f06ec85bf3280c48c67e731d8290ad3d730f8be9dc07946923005c8" +dependencies = [ + "once_cell", + "target-lexicon", +] + +[[package]] +name = "pyo3-ffi" +version = "0.17.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f6cb136e222e49115b3c51c32792886defbfb0adead26a688142b346a0b9ffc" +dependencies = [ + "libc", + "pyo3-build-config", +] + +[[package]] +name = "pyo3-macros" +version = "0.17.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94144a1266e236b1c932682136dc35a9dee8d3589728f68130c7c3861ef96b28" +dependencies = [ + "proc-macro2", + "pyo3-macros-backend", + "quote", + "syn", +] + +[[package]] +name = "pyo3-macros-backend" +version = "0.17.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8df9be978a2d2f0cdebabb03206ed73b11314701a5bfe71b0d753b81997777f" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "quote" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8856d8364d252a14d474036ea1358d63c9e6965c8e5c1885c18f73d70bff9c7b" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "redox_syscall" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" +dependencies = [ + "bitflags", +] + +[[package]] +name = "scopeguard" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" + +[[package]] +name = "smallvec" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0" + +[[package]] +name = "syn" +version = "1.0.107" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f4064b5b16e03ae50984a5a8ed5d4f8803e6bc1fd170a3cda91a1be4b18e3f5" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "target-lexicon" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9410d0f6853b1d94f0e519fb95df60f29d2c1eff2d921ffdf01a4c8a3b54f12d" + +[[package]] +name = "unicode-ident" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84a22b9f218b40614adcb3f4ff08b703773ad44fa9423e4e0d346d5db86e4ebc" + +[[package]] +name = "unicorn-engine" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3b881bfd9837ff4f62e81a1e64b40a584604375ae0a73d0d5f09b7a72350b96" +dependencies = [ + "bitflags", + "cc", + "cmake", + "libc", + "pkg-config", +] + +[[package]] +name = "unindent" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1766d682d402817b5ac4490b3c3002d91dfa0d22812f341609f97b08757359c" + +[[package]] +name = "windows-sys" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.42.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c9864e83243fdec7fc9c5444389dcbbfd258f745e7853198f365e3c4968a608" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.42.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c8b1b673ffc16c47a9ff48570a9d85e25d265735c503681332589af6253c6c7" + +[[package]] +name = "windows_i686_gnu" +version = "0.42.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de3887528ad530ba7bdbb1faa8275ec7a1155a45ffa57c37993960277145d640" + +[[package]] +name = "windows_i686_msvc" +version = "0.42.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf4d1122317eddd6ff351aa852118a2418ad4214e6613a50e0191f7004372605" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.42.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1040f221285e17ebccbc2591ffdc2d44ee1f9186324dd3e84e99ac68d699c45" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.42.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "628bfdf232daa22b0d64fdb62b09fcc36bb01f05a3939e20ab73aaf9470d0463" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.42.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "447660ad36a13288b1db4d4248e857b510e8c3a225c822ba4fb748c0aafecffd" diff --git a/emulation_worker/Cargo.toml b/emulation_worker/Cargo.toml new file mode 100644 index 0000000..cb3adc3 --- /dev/null +++ b/emulation_worker/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "emulation_worker" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[lib] +name = "emulation_worker" +crate-type = ["cdylib"] + +[dependencies] +pyo3 = { version = "0.17.3", features = ["extension-module"] } +unicorn-engine = "2.0.0" diff --git a/emulation_worker/pyproject.toml b/emulation_worker/pyproject.toml new file mode 100644 index 0000000..02dfebe --- /dev/null +++ b/emulation_worker/pyproject.toml @@ -0,0 +1,14 @@ +[build-system] +requires = ["maturin>=0.14,<0.15"] +build-backend = "maturin" + +[project] +name = "emulation_worker" +requires-python = ">=3.7" +classifiers = [ + "Programming Language :: Rust", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", +] + + diff --git a/emulation_worker/src/arm.rs b/emulation_worker/src/arm.rs new file mode 100644 index 0000000..3a0c41e --- /dev/null +++ b/emulation_worker/src/arm.rs @@ -0,0 +1,85 @@ +use pyo3::types::PyDict; +use unicorn_engine::{RegisterARM, Unicorn}; + +pub fn initialize_arm_registers(emu: &mut Unicorn<()>, registerdump: &PyDict) { + emu.reg_write( + RegisterARM::R0, + registerdump.get_item("r0").unwrap().extract().unwrap(), + ) + .unwrap(); + emu.reg_write( + RegisterARM::R1, + registerdump.get_item("r1").unwrap().extract().unwrap(), + ) + .unwrap(); + emu.reg_write( + RegisterARM::R2, + registerdump.get_item("r2").unwrap().extract().unwrap(), + ) + .unwrap(); + emu.reg_write( + RegisterARM::R3, + registerdump.get_item("r3").unwrap().extract().unwrap(), + ) + .unwrap(); + emu.reg_write( + RegisterARM::R4, + registerdump.get_item("r4").unwrap().extract().unwrap(), + ) + .unwrap(); + emu.reg_write( + RegisterARM::R5, + registerdump.get_item("r5").unwrap().extract().unwrap(), + ) + .unwrap(); + emu.reg_write( + RegisterARM::R6, + registerdump.get_item("r6").unwrap().extract().unwrap(), + ) + .unwrap(); + emu.reg_write( + RegisterARM::R7, + registerdump.get_item("r7").unwrap().extract().unwrap(), + ) + .unwrap(); + emu.reg_write( + RegisterARM::R8, + registerdump.get_item("r8").unwrap().extract().unwrap(), + ) + .unwrap(); + emu.reg_write( + RegisterARM::R9, + registerdump.get_item("r9").unwrap().extract().unwrap(), + ) + .unwrap(); + emu.reg_write( + RegisterARM::R10, + registerdump.get_item("r10").unwrap().extract().unwrap(), + ) + .unwrap(); + emu.reg_write( + RegisterARM::R11, + registerdump.get_item("r11").unwrap().extract().unwrap(), + ) + .unwrap(); + emu.reg_write( + RegisterARM::R12, + registerdump.get_item("r12").unwrap().extract().unwrap(), + ) + .unwrap(); + emu.reg_write( + RegisterARM::R13, + registerdump.get_item("r13").unwrap().extract().unwrap(), + ) + .unwrap(); + emu.reg_write( + RegisterARM::R14, + registerdump.get_item("r14").unwrap().extract().unwrap(), + ) + .unwrap(); + emu.reg_write( + RegisterARM::R15, + registerdump.get_item("r15").unwrap().extract().unwrap(), + ) + .unwrap(); +} diff --git a/emulation_worker/src/hooks.rs b/emulation_worker/src/hooks.rs new file mode 100644 index 0000000..b1b17c1 --- /dev/null +++ b/emulation_worker/src/hooks.rs @@ -0,0 +1,105 @@ +use pyo3::types::{PyDict, PyList}; +use std::io; +use std::sync::Arc; +use unicorn_engine::unicorn_const::{HookType, MemType}; +use unicorn_engine::RegisterARM; +use unicorn_engine::Unicorn; + +use crate::{MemInfo, Logs}; + +fn hook_mem_cb(uc: &mut Unicorn<'_, ()>, mem_type: MemType, address: u64, size: usize, _value: i64, logs: &Arc) -> bool { + let pc = uc.reg_read(RegisterARM::PC).unwrap(); + + let identifier = format!("{address}|{pc}"); + + let mut meminfo = logs.meminfo.write().expect("RwLock poisoned"); + + if meminfo.contains_key(&identifier) { + if let Some(mut element) = meminfo.get_mut(&identifier) { + element.counter += 1; + } + } else { + let last_tbid = *logs.last_tbid.read().unwrap(); + meminfo.insert(identifier, MemInfo{ + ins: address, + counter: 1, + direction: if mem_type == MemType::READ { 0 } else { 1 }, + address: pc, + tbid: last_tbid, + size + }); + } + + true +} + +fn hook_block_cb(_uc: &mut Unicorn<'_, ()>, address: u64, _size: u32, logs: &Arc) { + // Save current tbid for meminfo logs + let mut last_tbid = logs.last_tbid.write().expect("RwLock poisoned"); + *last_tbid = address; +} + +fn end_hook_cb(uc: &mut Unicorn<'_, ()>, address: u64, _size: u32, logs: &Arc) { + let mut endpoints = logs.endpoints.write().expect("RwLock poisoned"); + + let counter = endpoints.get_mut(&address).unwrap(); + if *counter > 1 { + *counter -= 1; + } else { + println!("Reached endpoint at {address:?}"); + uc.emu_stop() + .expect("failed terminating the emulation engine"); + } +} + +fn initialize_mem_hook(emu: &mut Unicorn<()>, logs_arc: &Arc) -> io::Result<()> { + let logs_arc = Arc::clone(&logs_arc); + let hook_mem_closure = move |uc: &mut Unicorn<'_, ()>, mem_type: MemType, address: u64, size: usize, value: i64| -> bool { + hook_mem_cb(uc, mem_type, address, size, value, &logs_arc) + }; + emu.add_mem_hook(HookType::MEM_READ | HookType::MEM_WRITE, 0, u64::MAX, hook_mem_closure).expect("failed to add read mem hook"); + + Ok(()) +} + +fn initialize_block_hook(emu: &mut Unicorn<()>, logs_arc: &Arc) -> io::Result<()> { + let logs_arc = Arc::clone(&logs_arc); + let hook_block_closure = move |uc: &mut Unicorn<'_, ()>, address: u64, size: u32| { + hook_block_cb(uc, address, size, &logs_arc); + }; + emu.add_block_hook(hook_block_closure).expect("failed to add block hook"); + + Ok(()) +} + +fn initialize_end_hook(emu: &mut Unicorn<()>, logs_arc: &Arc, config: &PyDict) -> io::Result<()> { + let config_endpoints: &PyList = config.get_item("end").unwrap().extract()?; + for obj in config_endpoints { + let end: &PyDict = obj.extract()?; + let address: u64 = end.get_item("address").unwrap().extract()?; + let counter: u32 = end.get_item("counter").unwrap().extract()?; + + let logs_arc = Arc::clone(&logs_arc); + + let mut endpoints = logs_arc.endpoints.write().expect("RwLock poisoned"); + endpoints.insert(address, counter); + drop(endpoints); + + let logs_arc = Arc::clone(&logs_arc); + + let end_hook_closure = move |uc: &mut Unicorn<'_, ()>, address: u64, size: u32| { + end_hook_cb(uc, address, size, &logs_arc); + }; + emu.add_code_hook(address, address, end_hook_closure).expect("failed to add block hook"); + } + + Ok(()) +} + +pub fn initialize_hooks(emu: &mut Unicorn<()>, logs_arc: &Arc, config: &PyDict) -> io::Result<()> { + initialize_mem_hook(emu, logs_arc)?; + initialize_block_hook(emu, logs_arc)?; + initialize_end_hook(emu, logs_arc, config)?; + + Ok(()) +} diff --git a/emulation_worker/src/lib.rs b/emulation_worker/src/lib.rs new file mode 100644 index 0000000..ec48dbc --- /dev/null +++ b/emulation_worker/src/lib.rs @@ -0,0 +1,83 @@ +use priority_queue::PriorityQueue; +use pyo3::{ + prelude::*, + types::{PyDict, PyList}, +}; +use std::collections::HashMap; +use std::sync::{Arc, RwLock}; + +use unicorn_engine::Unicorn; +use unicorn_engine::unicorn_const::{ + Arch, + Mode, + Permission, +}; + +mod arm; +use crate::arm::initialize_arm_registers; + +mod logs; +use crate::logs::{Logs, MemInfo}; + +mod hooks; +use crate::hooks::initialize_hooks; + +#[pyfunction] +fn run_unicorn(pregoldenrun_data: &PyDict, config: &PyDict) -> PyResult { + + println!("{:?}", config); + let mut unicorn = Unicorn::new(Arch::ARM, Mode::THUMB).expect("failed to initialize Unicorn instance"); + let emu = &mut unicorn; + + let memdumplist: &PyList = pregoldenrun_data + .get_item("memdumplist") + .unwrap() + .extract()?; + for obj in memdumplist.iter() { + let memdump: &PyDict = obj.extract()?; + let address: u64 = memdump.get_item("address").unwrap().extract()?; + let length: usize = memdump.get_item("len").unwrap().extract()?; + let dumps: &PyList = memdump.get_item("dumps").unwrap().extract()?; + let dump: Vec = dumps.get_item(0).unwrap().extract()?; + + // TODO: Use correct permissions + emu.mem_map(address, length, Permission::ALL) + .expect("failed to map code page"); + emu.mem_write(address, dump.as_slice()) + .expect("failed to write instructions"); + } + + let armregisters: &PyList = pregoldenrun_data + .get_item("armregisters") + .unwrap() + .extract()?; + let registerdump: &PyDict = armregisters.get_item(0).unwrap().extract()?; + + initialize_arm_registers(emu, registerdump); + + let logs = Logs { + meminfo: RwLock::new(HashMap::new()), + last_tbid: RwLock::new(0), + endpoints: RwLock::new(HashMap::new()) + }; + let logs_arc : Arc = Arc::new(logs); + + initialize_hooks(emu, &logs_arc, config).expect("failed initializing hooks"); + + let max_instruction_count: usize = config + .get_item("max_instruction_count") + .unwrap() + .extract()?; + let start: HashMap = config.get_item("start").unwrap().extract()?; + emu.emu_start(*start.get("address").unwrap()+1, 0, 0, max_instruction_count).expect("failed to emulate code"); + + let gil = Python::acquire_gil(); + let py = gil.python(); + Ok(logs_arc.to_object(py)) +} + +#[pymodule] +fn emulation_worker(_py: Python, m: &PyModule) -> PyResult<()> { + m.add_function(wrap_pyfunction!(run_unicorn, m)?)?; + Ok(()) +} diff --git a/emulation_worker/src/logs.rs b/emulation_worker/src/logs.rs new file mode 100644 index 0000000..85f334c --- /dev/null +++ b/emulation_worker/src/logs.rs @@ -0,0 +1,50 @@ +use pyo3::{prelude::*, types::{PyDict, PyList}}; +use std::collections::HashMap; +use std::sync::RwLock; + +pub struct MemInfo { + pub ins: u64, + pub counter: u32, + pub direction: u8, + pub address: u64, + pub tbid: u64, + pub size: usize, +} + +impl ToPyObject for MemInfo { + fn to_object(&self, py: Python<'_>) -> PyObject { + let dict = PyDict::new(py); + dict.set_item("ins", self.ins).unwrap(); + dict.set_item("counter", self.counter).unwrap(); + dict.set_item("direction", self.direction).unwrap(); + dict.set_item("address", self.address).unwrap(); + dict.set_item("tbid", self.tbid).unwrap(); + dict.set_item("size", self.size).unwrap(); + + dict.to_object(py) + } +} + + +pub struct Logs { + pub meminfo: RwLock>, + + pub last_tbid: RwLock, + pub endpoints: RwLock> +} + +impl ToPyObject for Logs { + fn to_object(&self, py: Python<'_>) -> PyObject { + let dict = PyDict::new(py); + + let meminfo = self.meminfo.read().expect("RwLock poisoned"); + let meminfo_list = PyList::new(py, meminfo.values()); + + dict.set_item("meminfo", meminfo_list.to_object(py)) + .unwrap(); + drop(meminfo); + + dict.to_object(py) + } +} + diff --git a/faultclass.py b/faultclass.py index c26dcfb..7beb7b1 100644 --- a/faultclass.py +++ b/faultclass.py @@ -799,5 +799,21 @@ def python_worker_unicorn( goldenrun_data, change_nice=False, ): - run_unicorn(pregoldenrun_data, config_qemu) + t0 = time.time() + if change_nice: + os.nice(19) + + logs = run_unicorn(pregoldenrun_data, config_qemu) + logger.info(f"Ended qemu for exp {index}! Took {time.time() - t0}") + + logs["index"] = index + + queue_output.put(logs) + + logger.info( + "Python worker for experiment {} done. Took {}s".format( + index, time.time() - t0 + ) + ) + return From 6b3ea31129e76b6f2809190aaa048935fc7b39ad Mon Sep 17 00:00:00 2001 From: Kevin Schneider Date: Wed, 8 Feb 2023 19:23:30 +0100 Subject: [PATCH 04/13] implement fault types --- emulation_worker/Cargo.lock | 77 ++++++++++++++++ emulation_worker/Cargo.toml | 1 + emulation_worker/src/hooks.rs | 164 ++++++++++++++++++++++++++++------ emulation_worker/src/lib.rs | 37 +++++--- emulation_worker/src/logs.rs | 71 ++++++++++++++- faultclass.py | 7 +- hdf5logger.py | 2 +- 7 files changed, 310 insertions(+), 49 deletions(-) diff --git a/emulation_worker/Cargo.lock b/emulation_worker/Cargo.lock index f260729..2f84546 100644 --- a/emulation_worker/Cargo.lock +++ b/emulation_worker/Cargo.lock @@ -39,6 +39,7 @@ dependencies = [ name = "emulation_worker" version = "0.1.0" dependencies = [ + "num", "pyo3", "unicorn-engine", ] @@ -74,6 +75,82 @@ dependencies = [ "autocfg", ] +[[package]] +name = "num" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43db66d1170d347f9a065114077f7dccb00c1b9478c89384490a3425279a4606" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + +[[package]] +name = "num-bigint" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f93ab6289c7b344a8a9f60f88d80aa20032336fe78da341afc91c8a2341fc75f" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-complex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02e0d21255c828d6f128a1e41534206671e8c3ea0c62f32291e808dc82cff17d" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" +dependencies = [ + "autocfg", + "num-traits", +] + +[[package]] +name = "num-iter" +version = "0.1.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d03e6c028c5dc5cac6e2dec0efda81fc887605bb3d884578bb6d6bf7514e252" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0" +dependencies = [ + "autocfg", + "num-bigint", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" +dependencies = [ + "autocfg", +] + [[package]] name = "once_cell" version = "1.17.0" diff --git a/emulation_worker/Cargo.toml b/emulation_worker/Cargo.toml index cb3adc3..b072426 100644 --- a/emulation_worker/Cargo.toml +++ b/emulation_worker/Cargo.toml @@ -11,3 +11,4 @@ crate-type = ["cdylib"] [dependencies] pyo3 = { version = "0.17.3", features = ["extension-module"] } unicorn-engine = "2.0.0" +num = "0.4.0" diff --git a/emulation_worker/src/hooks.rs b/emulation_worker/src/hooks.rs index b1b17c1..6ad1173 100644 --- a/emulation_worker/src/hooks.rs +++ b/emulation_worker/src/hooks.rs @@ -1,3 +1,4 @@ +use num::{BigUint, ToPrimitive}; use pyo3::types::{PyDict, PyList}; use std::io; use std::sync::Arc; @@ -5,21 +6,21 @@ use unicorn_engine::unicorn_const::{HookType, MemType}; use unicorn_engine::RegisterARM; use unicorn_engine::Unicorn; -use crate::{MemInfo, Logs}; +use crate::{Fault, FaultModel, FaultType, MemInfo, State}; -fn hook_mem_cb(uc: &mut Unicorn<'_, ()>, mem_type: MemType, address: u64, size: usize, _value: i64, logs: &Arc) -> bool { +fn mem_hook_cb(uc: &mut Unicorn<'_, ()>, mem_type: MemType, address: u64, size: usize, _value: i64, state: &Arc) -> bool { let pc = uc.reg_read(RegisterARM::PC).unwrap(); let identifier = format!("{address}|{pc}"); - let mut meminfo = logs.meminfo.write().expect("RwLock poisoned"); + let mut meminfo = state.logs.meminfo.write().expect("RwLock poisoned"); if meminfo.contains_key(&identifier) { if let Some(mut element) = meminfo.get_mut(&identifier) { element.counter += 1; } } else { - let last_tbid = *logs.last_tbid.read().unwrap(); + let last_tbid = *state.last_tbid.read().unwrap(); meminfo.insert(identifier, MemInfo{ ins: address, counter: 1, @@ -33,17 +34,21 @@ fn hook_mem_cb(uc: &mut Unicorn<'_, ()>, mem_type: MemType, address: u64, size: true } -fn hook_block_cb(_uc: &mut Unicorn<'_, ()>, address: u64, _size: u32, logs: &Arc) { +fn block_hook_cb(_uc: &mut Unicorn<'_, ()>, address: u64, _size: u32, state: &Arc) { // Save current tbid for meminfo logs - let mut last_tbid = logs.last_tbid.write().expect("RwLock poisoned"); + let mut last_tbid = state.last_tbid.write().expect("RwLock poisoned"); *last_tbid = address; } -fn end_hook_cb(uc: &mut Unicorn<'_, ()>, address: u64, _size: u32, logs: &Arc) { - let mut endpoints = logs.endpoints.write().expect("RwLock poisoned"); +fn end_hook_cb(uc: &mut Unicorn<'_, ()>, address: u64, _size: u32, state: &Arc) { + let mut endpoints = state.endpoints.write().expect("RwLock poisoned"); let counter = endpoints.get_mut(&address).unwrap(); if *counter > 1 { + println!( + "Decreasing endpoint counter for {:?} to {:?}", + address, *counter + ); *counter -= 1; } else { println!("Reached endpoint at {address:?}"); @@ -52,54 +57,155 @@ fn end_hook_cb(uc: &mut Unicorn<'_, ()>, address: u64, _size: u32, logs: &Arc, logs_arc: &Arc) -> io::Result<()> { - let logs_arc = Arc::clone(&logs_arc); - let hook_mem_closure = move |uc: &mut Unicorn<'_, ()>, mem_type: MemType, address: u64, size: usize, value: i64| -> bool { - hook_mem_cb(uc, mem_type, address, size, value, &logs_arc) - }; - emu.add_mem_hook(HookType::MEM_READ | HookType::MEM_WRITE, 0, u64::MAX, hook_mem_closure).expect("failed to add read mem hook"); +fn apply_model(data: &BigUint, fault: &Fault) -> BigUint { + let mask_big = BigUint::from_bytes_le(&fault.mask.to_le_bytes()); + match fault.model { + FaultModel::Set0 => data ^ (data & mask_big), + FaultModel::Set1 => data | mask_big, + FaultModel::Toggle => data & data, + FaultModel::Overwrite => mask_big + } +} + +fn fault_hook_cb(uc: &mut Unicorn<'_, ()>, address: u64, _size: u32, state: &Arc) { + let mut faults = state.faults.write().expect("RwLock poisoned"); + + let fault = faults.get_mut(&address).unwrap(); + if fault.trigger.hitcounter == 0 { + return; + } + fault.trigger.hitcounter -= 1; + if fault.trigger.hitcounter >= 1 { + return; + } + + println!("Reached fault trigger at {:?}", address); + + match fault.kind { + FaultType::Data | FaultType::Instruction => { + let fault_size = if matches!(fault.model, FaultModel::Overwrite) { + fault.num_bytes + } else { + 1 + }; + let data = BigUint::from_bytes_le( + uc.mem_read_as_vec(fault.address, fault_size as usize) + .unwrap() + .as_slice(), + ); + println!( + "Overwriting {:?} with {:?}", + data, + apply_model(&data, fault) + ); + uc.mem_write( + fault.address, + apply_model(&data, fault).to_bytes_le().as_slice(), + ) + .expect("failed writing fault data to memory"); + } + FaultType::Register => { + let register_value = BigUint::from( + uc.reg_read(fault.address as i32) + .expect("failed reading from register"), + ); + let new_value = apply_model(®ister_value, fault); + uc.reg_write(fault.address as i32, new_value.to_u64().unwrap()) + .expect("failed writing register fault"); + } + } +} + +fn initialize_mem_hook(emu: &mut Unicorn<()>, state_arc: &Arc) -> io::Result<()> { + let state_arc = Arc::clone(state_arc); + let mem_hook_closure = + move |uc: &mut Unicorn<'_, ()>, + mem_type: MemType, + address: u64, + size: usize, + value: i64| + -> bool { mem_hook_cb(uc, mem_type, address, size, value, &state_arc) }; + emu.add_mem_hook( + HookType::MEM_READ | HookType::MEM_WRITE, + 0, + u64::MAX, + mem_hook_closure, + ) + .expect("failed to add read mem hook"); Ok(()) } -fn initialize_block_hook(emu: &mut Unicorn<()>, logs_arc: &Arc) -> io::Result<()> { - let logs_arc = Arc::clone(&logs_arc); - let hook_block_closure = move |uc: &mut Unicorn<'_, ()>, address: u64, size: u32| { - hook_block_cb(uc, address, size, &logs_arc); +fn initialize_block_hook(emu: &mut Unicorn<()>, state_arc: &Arc) -> io::Result<()> { + let state_arc = Arc::clone(state_arc); + let block_hook_closure = move |uc: &mut Unicorn<'_, ()>, address: u64, size: u32| { + block_hook_cb(uc, address, size, &state_arc); }; - emu.add_block_hook(hook_block_closure).expect("failed to add block hook"); + emu.add_block_hook(block_hook_closure) + .expect("failed to add block hook"); Ok(()) } -fn initialize_end_hook(emu: &mut Unicorn<()>, logs_arc: &Arc, config: &PyDict) -> io::Result<()> { +fn initialize_end_hook(emu: &mut Unicorn<()>, state_arc: &Arc, config: &PyDict) -> io::Result<()> { let config_endpoints: &PyList = config.get_item("end").unwrap().extract()?; for obj in config_endpoints { let end: &PyDict = obj.extract()?; let address: u64 = end.get_item("address").unwrap().extract()?; let counter: u32 = end.get_item("counter").unwrap().extract()?; - let logs_arc = Arc::clone(&logs_arc); + let state_arc = Arc::clone(state_arc); - let mut endpoints = logs_arc.endpoints.write().expect("RwLock poisoned"); + let mut endpoints = state_arc.endpoints.write().expect("RwLock poisoned"); endpoints.insert(address, counter); drop(endpoints); - let logs_arc = Arc::clone(&logs_arc); + let state_arc = Arc::clone(&state_arc); let end_hook_closure = move |uc: &mut Unicorn<'_, ()>, address: u64, size: u32| { - end_hook_cb(uc, address, size, &logs_arc); + end_hook_cb(uc, address, size, &state_arc); + }; + emu.add_code_hook(address, address, end_hook_closure) + .expect("failed to add end hook"); + } + + Ok(()) +} + +fn initialize_fault_hook(emu: &mut Unicorn<()>, state_arc: &Arc, faults: Vec, config: &PyDict) -> io::Result<()> { + for fault in faults { + let state_arc = Arc::clone(state_arc); + + let mut state_faults = state_arc.faults.write().expect("RwLock poisoned"); + state_faults.insert(fault.trigger.address, fault); + drop(state_faults); + + let state_arc = Arc::clone(&state_arc); + + let fault_hook_closure = move |uc: &mut Unicorn<'_, ()>, address: u64, size: u32| { + fault_hook_cb(uc, address, size, &state_arc); }; - emu.add_code_hook(address, address, end_hook_closure).expect("failed to add block hook"); + emu.add_code_hook( + fault.trigger.address, + fault.trigger.address, + fault_hook_closure, + ) + .expect("failed to add fault hook"); } Ok(()) } -pub fn initialize_hooks(emu: &mut Unicorn<()>, logs_arc: &Arc, config: &PyDict) -> io::Result<()> { - initialize_mem_hook(emu, logs_arc)?; - initialize_block_hook(emu, logs_arc)?; - initialize_end_hook(emu, logs_arc, config)?; +pub fn initialize_hooks( + emu: &mut Unicorn<()>, + state_arc: &Arc, + faults: Vec, + config: &PyDict, +) -> io::Result<()> { + initialize_mem_hook(emu, state_arc)?; + initialize_block_hook(emu, state_arc)?; + initialize_end_hook(emu, state_arc, config)?; + initialize_fault_hook(emu, state_arc, faults, config)?; Ok(()) } diff --git a/emulation_worker/src/lib.rs b/emulation_worker/src/lib.rs index ec48dbc..a0715b3 100644 --- a/emulation_worker/src/lib.rs +++ b/emulation_worker/src/lib.rs @@ -17,16 +17,20 @@ mod arm; use crate::arm::initialize_arm_registers; mod logs; -use crate::logs::{Logs, MemInfo}; +use crate::logs::{Fault, FaultModel, FaultType, Logs, MemInfo, State}; mod hooks; use crate::hooks::initialize_hooks; #[pyfunction] -fn run_unicorn(pregoldenrun_data: &PyDict, config: &PyDict) -> PyResult { - - println!("{:?}", config); - let mut unicorn = Unicorn::new(Arch::ARM, Mode::THUMB).expect("failed to initialize Unicorn instance"); +fn run_unicorn( + pregoldenrun_data: &PyDict, + faults: Vec, + config: &PyDict, +) -> PyResult { + println!("{config:?}"); + let mut unicorn = + Unicorn::new(Arch::ARM, Mode::THUMB).expect("failed to initialize Unicorn instance"); let emu = &mut unicorn; let memdumplist: &PyList = pregoldenrun_data @@ -57,23 +61,32 @@ fn run_unicorn(pregoldenrun_data: &PyDict, config: &PyDict) -> PyResult = Arc::new(logs); - initialize_hooks(emu, &logs_arc, config).expect("failed initializing hooks"); + let state_arc: Arc = Arc::new(state); + initialize_hooks(emu, &state_arc, faults, config).expect("failed initializing hooks"); let max_instruction_count: usize = config .get_item("max_instruction_count") .unwrap() .extract()?; let start: HashMap = config.get_item("start").unwrap().extract()?; - emu.emu_start(*start.get("address").unwrap()+1, 0, 0, max_instruction_count).expect("failed to emulate code"); + emu.emu_start( + *start.get("address").unwrap() + 1, + 0, + 0, + max_instruction_count, + ) + .expect("failed to emulate code"); - let gil = Python::acquire_gil(); - let py = gil.python(); - Ok(logs_arc.to_object(py)) + Python::with_gil(|py| Ok(state_arc.logs.to_object(py))) } #[pymodule] diff --git a/emulation_worker/src/logs.rs b/emulation_worker/src/logs.rs index 85f334c..ec8d1d1 100644 --- a/emulation_worker/src/logs.rs +++ b/emulation_worker/src/logs.rs @@ -1,4 +1,8 @@ -use pyo3::{prelude::*, types::{PyDict, PyList}}; +use pyo3::{ + exceptions, + prelude::*, + types::{PyDict, PyList}, +}; use std::collections::HashMap; use std::sync::RwLock; @@ -25,12 +29,64 @@ impl ToPyObject for MemInfo { } } +#[derive(Debug, Clone, Copy)] +pub enum FaultModel { + Set0, + Set1, + Toggle, + Overwrite, +} +impl<'a> FromPyObject<'a> for FaultModel { + fn extract(arg: &'a PyAny) -> PyResult { + let id: u8 = arg.extract().unwrap(); + match id { + 0 => Ok(FaultModel::Set0), + 1 => Ok(FaultModel::Set1), + 2 => Ok(FaultModel::Toggle), + 3 => Ok(FaultModel::Overwrite), + 4..=u8::MAX => Err(exceptions::PyValueError::new_err("unknown fault model")), + } + } +} + +#[derive(Debug, Clone, Copy)] +pub enum FaultType { + Data, + Instruction, + Register, +} +impl<'a> FromPyObject<'a> for FaultType { + fn extract(arg: &'a PyAny) -> PyResult { + let id: u8 = arg.extract().unwrap(); + match id { + 0 => Ok(FaultType::Data), + 1 => Ok(FaultType::Instruction), + 2 => Ok(FaultType::Register), + 3..=u8::MAX => Err(exceptions::PyValueError::new_err("unknown fault type")), + } + } +} + +#[derive(FromPyObject, Debug, Clone, Copy)] +pub struct Trigger { + pub address: u64, + pub hitcounter: u32, +} + +#[derive(FromPyObject, Debug, Clone, Copy)] +pub struct Fault { + pub trigger: Trigger, + pub address: u64, + pub kind: FaultType, + pub model: FaultModel, + pub mask: u128, + pub lifespan: u32, + pub num_bytes: u32, + pub wildcard: bool, +} pub struct Logs { pub meminfo: RwLock>, - - pub last_tbid: RwLock, - pub endpoints: RwLock> } impl ToPyObject for Logs { @@ -48,3 +104,10 @@ impl ToPyObject for Logs { } } +pub struct State { + pub last_tbid: RwLock, + pub endpoints: RwLock>, + pub faults: RwLock>, + + pub logs: Logs, +} diff --git a/faultclass.py b/faultclass.py index 7beb7b1..843b749 100644 --- a/faultclass.py +++ b/faultclass.py @@ -43,7 +43,7 @@ def __init__( """ self.trigger = Trigger(trigger_address, trigger_hitcounter) self.address = fault_address - self.type = fault_type + self.kind = fault_type self.model = fault_model self.lifespan = fault_lifespan self.mask = fault_mask @@ -54,7 +54,7 @@ def write_to_fifo(self, fifo): out = "\n$$[Fault]\n" out = out + "% {:d} | {:d} | {:d} | {:d} | {:d} | {:d} | ".format( self.address, - self.type, + self.kind, self.model, self.lifespan, self.trigger.address, @@ -545,6 +545,7 @@ def readout_data( if callable(qemu_post): output = qemu_post(qemu_pre_data, output) + queue_output.put(output) max_ram_usage = gather_process_ram_usage(queue_ram_usage, max_ram_usage) @@ -803,7 +804,7 @@ def python_worker_unicorn( if change_nice: os.nice(19) - logs = run_unicorn(pregoldenrun_data, config_qemu) + logs = run_unicorn(pregoldenrun_data, fault_list, config_qemu) logger.info(f"Ended qemu for exp {index}! Took {time.time() - t0}") logs["index"] = index diff --git a/hdf5logger.py b/hdf5logger.py index 688c5cd..5033a1d 100644 --- a/hdf5logger.py +++ b/hdf5logger.py @@ -224,7 +224,7 @@ def process_faults(f, group, faultlist, endpoint, end_reason, myfilter): faultrow["trigger_address"] = fault.trigger.address faultrow["trigger_hitcounter"] = fault.trigger.hitcounter faultrow["fault_address"] = fault.address - faultrow["fault_type"] = fault.type + faultrow["fault_type"] = fault.kind faultrow["fault_model"] = fault.model faultrow["fault_lifespan"] = fault.lifespan faultrow["fault_mask_upper"] = (fault.mask >> 64) & (pow(2, 64) - 1) From 67a5fce2f0b0238008eac8215d430be6b836462a Mon Sep 17 00:00:00 2001 From: Kevin Schneider Date: Fri, 10 Feb 2023 16:35:54 +0100 Subject: [PATCH 05/13] implement fault lifetimes --- emulation_worker/Cargo.lock | 27 ++++++++ emulation_worker/Cargo.toml | 1 + emulation_worker/src/hooks.rs | 115 +++++++++++++++++++++++++++++++--- emulation_worker/src/lib.rs | 3 + emulation_worker/src/logs.rs | 7 ++- 5 files changed, 142 insertions(+), 11 deletions(-) diff --git a/emulation_worker/Cargo.lock b/emulation_worker/Cargo.lock index 2f84546..7fd1206 100644 --- a/emulation_worker/Cargo.lock +++ b/emulation_worker/Cargo.lock @@ -40,10 +40,27 @@ name = "emulation_worker" version = "0.1.0" dependencies = [ "num", + "priority-queue", "pyo3", "unicorn-engine", ] +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" + +[[package]] +name = "indexmap" +version = "1.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1885e79c1fc4b10f0e172c475f458b7f7b93061064d98c3293e98c5ba0c8b399" +dependencies = [ + "autocfg", + "hashbrown", +] + [[package]] name = "indoc" version = "1.0.8" @@ -186,6 +203,16 @@ version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160" +[[package]] +name = "priority-queue" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ca9c6be70d989d21a136eb86c2d83e4b328447fac4a88dace2143c179c86267" +dependencies = [ + "autocfg", + "indexmap", +] + [[package]] name = "proc-macro2" version = "1.0.50" diff --git a/emulation_worker/Cargo.toml b/emulation_worker/Cargo.toml index b072426..9eca8c0 100644 --- a/emulation_worker/Cargo.toml +++ b/emulation_worker/Cargo.toml @@ -12,3 +12,4 @@ crate-type = ["cdylib"] pyo3 = { version = "0.17.3", features = ["extension-module"] } unicorn-engine = "2.0.0" num = "0.4.0" +priority-queue = "1.3.1" diff --git a/emulation_worker/src/hooks.rs b/emulation_worker/src/hooks.rs index 6ad1173..2507775 100644 --- a/emulation_worker/src/hooks.rs +++ b/emulation_worker/src/hooks.rs @@ -13,7 +13,7 @@ fn mem_hook_cb(uc: &mut Unicorn<'_, ()>, mem_type: MemType, address: u64, size: let identifier = format!("{address}|{pc}"); - let mut meminfo = state.logs.meminfo.write().expect("RwLock poisoned"); + let mut meminfo = state.logs.meminfo.write().unwrap(); if meminfo.contains_key(&identifier) { if let Some(mut element) = meminfo.get_mut(&identifier) { @@ -34,14 +34,51 @@ fn mem_hook_cb(uc: &mut Unicorn<'_, ()>, mem_type: MemType, address: u64, size: true } -fn block_hook_cb(_uc: &mut Unicorn<'_, ()>, address: u64, _size: u32, state: &Arc) { +fn block_hook_cb(uc: &mut Unicorn<'_, ()>, address: u64, size: u32, state: &Arc) { // Save current tbid for meminfo logs - let mut last_tbid = state.last_tbid.write().expect("RwLock poisoned"); + let mut last_tbid = state.last_tbid.write().unwrap(); *last_tbid = address; + + let live_faults = state.live_faults.read().unwrap(); + let mut single_step_hook_handle = state.single_step_hook_handle.write().unwrap(); + + if live_faults.len() == 0 && single_step_hook_handle.is_some() { + uc.remove_hook(single_step_hook_handle.unwrap()) + .expect("failed removing single step hook"); + *single_step_hook_handle = None; + } + + if single_step_hook_handle.is_some() { + return; + } + + let faults = state.faults.read().unwrap(); + + for fault in faults.values() { + if fault.lifespan == 0 { + continue; + } + + if fault.trigger.hitcounter == 1 + && fault.trigger.address >= address + && fault.trigger.address <= (address + size as u64) + { + let state_arc = Arc::clone(state); + let single_step_hook_closure = + move |uc: &mut Unicorn<'_, ()>, address: u64, size: u32| { + single_step_hook_cb(uc, address, size, &state_arc); + }; + *single_step_hook_handle = Some( + uc.add_code_hook(u64::MIN, u64::MAX, single_step_hook_closure) + .unwrap(), + ); + } + } + } fn end_hook_cb(uc: &mut Unicorn<'_, ()>, address: u64, _size: u32, state: &Arc) { - let mut endpoints = state.endpoints.write().expect("RwLock poisoned"); + let mut endpoints = state.endpoints.write().unwrap(); let counter = endpoints.get_mut(&address).unwrap(); if *counter > 1 { @@ -57,6 +94,47 @@ fn end_hook_cb(uc: &mut Unicorn<'_, ()>, address: u64, _size: u32, state: &Arc, state: &Arc) { + { + let live_faults = state.live_faults.read().unwrap(); + + if live_faults.len() == 0 { + return; + } + + let ((_, _), priority) = live_faults.peek().unwrap(); + let lifespan = u64::MAX - priority; + let instruction_count = *state.instruction_count.read().unwrap(); + + if lifespan > instruction_count { + return; + } + } + + let mut live_faults = state.live_faults.write().unwrap(); + let ((address, prefault_data), _) = live_faults.pop().unwrap(); + + let faults = state.faults.read().unwrap(); + let fault = faults.get(&address).unwrap(); + + println!("Undoing fault"); + match fault.kind { + FaultType::Register => { + uc.reg_write(fault.address as i32, prefault_data.to_u64().unwrap()).expect("failed restoring register value"); + } + FaultType::Data | FaultType::Instruction => { + uc.mem_write(fault.address, prefault_data.to_bytes_le().as_slice()).expect("failed restoring memory value"); + } + } +} + +fn single_step_hook_cb(uc: &mut Unicorn<'_, ()>, _address: u64, _size: u32, state: &Arc) { + undo_faults(uc, state); + + let mut instruction_count = state.instruction_count.write().unwrap(); + *instruction_count = *instruction_count + 1; +} + fn apply_model(data: &BigUint, fault: &Fault) -> BigUint { let mask_big = BigUint::from_bytes_le(&fault.mask.to_le_bytes()); match fault.model { @@ -68,7 +146,7 @@ fn apply_model(data: &BigUint, fault: &Fault) -> BigUint { } fn fault_hook_cb(uc: &mut Unicorn<'_, ()>, address: u64, _size: u32, state: &Arc) { - let mut faults = state.faults.write().expect("RwLock poisoned"); + let mut faults = state.faults.write().unwrap(); let fault = faults.get_mut(&address).unwrap(); if fault.trigger.hitcounter == 0 { @@ -79,7 +157,9 @@ fn fault_hook_cb(uc: &mut Unicorn<'_, ()>, address: u64, _size: u32, state: &Arc return; } - println!("Reached fault trigger at {:?}", address); + println!("Executing fault at {address:?}"); + + let prefault_data; match fault.kind { FaultType::Data | FaultType::Instruction => { @@ -93,6 +173,7 @@ fn fault_hook_cb(uc: &mut Unicorn<'_, ()>, address: u64, _size: u32, state: &Arc .unwrap() .as_slice(), ); + prefault_data = data.clone(); println!( "Overwriting {:?} with {:?}", data, @@ -109,11 +190,21 @@ fn fault_hook_cb(uc: &mut Unicorn<'_, ()>, address: u64, _size: u32, state: &Arc uc.reg_read(fault.address as i32) .expect("failed reading from register"), ); + prefault_data = register_value.clone(); let new_value = apply_model(®ister_value, fault); uc.reg_write(fault.address as i32, new_value.to_u64().unwrap()) .expect("failed writing register fault"); } } + + if fault.lifespan != 0 { + let mut live_faults = state.live_faults.write().unwrap(); + let instruction_count = *state.instruction_count.read().unwrap(); + live_faults.push( + (fault.trigger.address, prefault_data), + u64::MAX - fault.lifespan as u64 + instruction_count, + ); + } } fn initialize_mem_hook(emu: &mut Unicorn<()>, state_arc: &Arc) -> io::Result<()> { @@ -156,7 +247,7 @@ fn initialize_end_hook(emu: &mut Unicorn<()>, state_arc: &Arc, config: &P let state_arc = Arc::clone(state_arc); - let mut endpoints = state_arc.endpoints.write().expect("RwLock poisoned"); + let mut endpoints = state_arc.endpoints.write().unwrap(); endpoints.insert(address, counter); drop(endpoints); @@ -172,11 +263,15 @@ fn initialize_end_hook(emu: &mut Unicorn<()>, state_arc: &Arc, config: &P Ok(()) } -fn initialize_fault_hook(emu: &mut Unicorn<()>, state_arc: &Arc, faults: Vec, config: &PyDict) -> io::Result<()> { +fn initialize_fault_hook( + emu: &mut Unicorn<()>, + state_arc: &Arc, + faults: Vec, +) -> io::Result<()> { for fault in faults { let state_arc = Arc::clone(state_arc); - let mut state_faults = state_arc.faults.write().expect("RwLock poisoned"); + let mut state_faults = state_arc.faults.write().unwrap(); state_faults.insert(fault.trigger.address, fault); drop(state_faults); @@ -205,7 +300,7 @@ pub fn initialize_hooks( initialize_mem_hook(emu, state_arc)?; initialize_block_hook(emu, state_arc)?; initialize_end_hook(emu, state_arc, config)?; - initialize_fault_hook(emu, state_arc, faults, config)?; + initialize_fault_hook(emu, state_arc, faults)?; Ok(()) } diff --git a/emulation_worker/src/lib.rs b/emulation_worker/src/lib.rs index a0715b3..0beb109 100644 --- a/emulation_worker/src/lib.rs +++ b/emulation_worker/src/lib.rs @@ -67,6 +67,9 @@ fn run_unicorn( last_tbid: RwLock::new(0), endpoints: RwLock::new(HashMap::new()), faults: RwLock::new(HashMap::new()), + live_faults: RwLock::new(PriorityQueue::new()), + instruction_count: RwLock::new(0), + single_step_hook_handle: RwLock::new(None), logs }; diff --git a/emulation_worker/src/logs.rs b/emulation_worker/src/logs.rs index ec8d1d1..b4adbb1 100644 --- a/emulation_worker/src/logs.rs +++ b/emulation_worker/src/logs.rs @@ -1,10 +1,12 @@ +use num::BigUint; +use priority_queue::PriorityQueue; use pyo3::{ exceptions, prelude::*, types::{PyDict, PyList}, }; -use std::collections::HashMap; use std::sync::RwLock; +use std::{collections::HashMap, ffi::c_void}; pub struct MemInfo { pub ins: u64, @@ -108,6 +110,9 @@ pub struct State { pub last_tbid: RwLock, pub endpoints: RwLock>, pub faults: RwLock>, + pub live_faults: RwLock>, + pub instruction_count: RwLock, + pub single_step_hook_handle: RwLock>, pub logs: Logs, } From dec8bed8a9f16ee2ef4e533eb8212415d11f30e0 Mon Sep 17 00:00:00 2001 From: Kevin Schneider Date: Fri, 10 Feb 2023 17:12:51 +0100 Subject: [PATCH 06/13] add endpoint logging --- emulation_worker/src/hooks.rs | 21 +++++++++++++++++---- emulation_worker/src/lib.rs | 1 + emulation_worker/src/logs.rs | 13 ++++++++++++- faultclass.py | 1 + 4 files changed, 31 insertions(+), 5 deletions(-) diff --git a/emulation_worker/src/hooks.rs b/emulation_worker/src/hooks.rs index 2507775..065ba37 100644 --- a/emulation_worker/src/hooks.rs +++ b/emulation_worker/src/hooks.rs @@ -77,7 +77,13 @@ fn block_hook_cb(uc: &mut Unicorn<'_, ()>, address: u64, size: u32, state: &Arc< } -fn end_hook_cb(uc: &mut Unicorn<'_, ()>, address: u64, _size: u32, state: &Arc) { +fn end_hook_cb( + uc: &mut Unicorn<'_, ()>, + address: u64, + _size: u32, + state: &Arc, + first_endpoint: u64, +) { let mut endpoints = state.endpoints.write().unwrap(); let counter = endpoints.get_mut(&address).unwrap(); @@ -88,6 +94,8 @@ fn end_hook_cb(uc: &mut Unicorn<'_, ()>, address: u64, _size: u32, state: &Arc, state_arc: &Arc) -> io::R Ok(()) } -fn initialize_end_hook(emu: &mut Unicorn<()>, state_arc: &Arc, config: &PyDict) -> io::Result<()> { +fn initialize_end_hook( + emu: &mut Unicorn<()>, + state_arc: &Arc, + config: &PyDict, + first_endpoint: u64, +) -> io::Result<()> { let config_endpoints: &PyList = config.get_item("end").unwrap().extract()?; for obj in config_endpoints { let end: &PyDict = obj.extract()?; @@ -254,7 +267,7 @@ fn initialize_end_hook(emu: &mut Unicorn<()>, state_arc: &Arc, config: &P let state_arc = Arc::clone(&state_arc); let end_hook_closure = move |uc: &mut Unicorn<'_, ()>, address: u64, size: u32| { - end_hook_cb(uc, address, size, &state_arc); + end_hook_cb(uc, address, size, &state_arc, first_endpoint); }; emu.add_code_hook(address, address, end_hook_closure) .expect("failed to add end hook"); @@ -299,7 +312,7 @@ pub fn initialize_hooks( ) -> io::Result<()> { initialize_mem_hook(emu, state_arc)?; initialize_block_hook(emu, state_arc)?; - initialize_end_hook(emu, state_arc, config)?; + initialize_end_hook(emu, state_arc, config, faults[0].trigger.address)?; initialize_fault_hook(emu, state_arc, faults)?; Ok(()) diff --git a/emulation_worker/src/lib.rs b/emulation_worker/src/lib.rs index 0beb109..3f5da6b 100644 --- a/emulation_worker/src/lib.rs +++ b/emulation_worker/src/lib.rs @@ -61,6 +61,7 @@ fn run_unicorn( let logs = Logs { meminfo: RwLock::new(HashMap::new()), + endpoint: RwLock::new((false, 0, 0)), }; let state = State { diff --git a/emulation_worker/src/logs.rs b/emulation_worker/src/logs.rs index b4adbb1..e2d38ad 100644 --- a/emulation_worker/src/logs.rs +++ b/emulation_worker/src/logs.rs @@ -89,17 +89,28 @@ pub struct Fault { pub struct Logs { pub meminfo: RwLock>, + pub endpoint: RwLock<(bool, u64, u32)> } impl ToPyObject for Logs { fn to_object(&self, py: Python<'_>) -> PyObject { let dict = PyDict::new(py); - let meminfo = self.meminfo.read().expect("RwLock poisoned"); + let meminfo = self.meminfo.read().unwrap(); let meminfo_list = PyList::new(py, meminfo.values()); dict.set_item("meminfo", meminfo_list.to_object(py)) .unwrap(); + + let endpoint = self.endpoint.read().unwrap(); + if endpoint.2 == 1 { + dict.set_item("end_reason", format!("{}/1", endpoint.1)) + .unwrap(); + } else { + dict.set_item("end_reason", "max tb").unwrap(); + } + dict.set_item("endpoint", if endpoint.0 { 1 } else { 0 }) + .unwrap(); drop(meminfo); dict.to_object(py) diff --git a/faultclass.py b/faultclass.py index 843b749..8c608ea 100644 --- a/faultclass.py +++ b/faultclass.py @@ -808,6 +808,7 @@ def python_worker_unicorn( logger.info(f"Ended qemu for exp {index}! Took {time.time() - t0}") logs["index"] = index + logs["faultlist"] = fault_list queue_output.put(logs) From 0200d9f70474aba8b1c6c69f65701df67de3c62f Mon Sep 17 00:00:00 2001 From: Kevin Schneider Date: Sat, 11 Feb 2023 17:21:41 +0100 Subject: [PATCH 07/13] add tbinfo logs --- emulation_worker/Cargo.lock | 21 ++++++ emulation_worker/Cargo.toml | 1 + emulation_worker/src/hooks.rs | 115 ++++++++++++----------------- emulation_worker/src/hooks/util.rs | 79 ++++++++++++++++++++ emulation_worker/src/lib.rs | 9 ++- emulation_worker/src/logs.rs | 32 +++++++- 6 files changed, 185 insertions(+), 72 deletions(-) create mode 100644 emulation_worker/src/hooks/util.rs diff --git a/emulation_worker/Cargo.lock b/emulation_worker/Cargo.lock index 7fd1206..15d0e00 100644 --- a/emulation_worker/Cargo.lock +++ b/emulation_worker/Cargo.lock @@ -14,6 +14,26 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +[[package]] +name = "capstone" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1097e608594dad3bad608295567f757742b883606fe150faf7a9740b849730d8" +dependencies = [ + "capstone-sys", + "libc", +] + +[[package]] +name = "capstone-sys" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e7f651d5ec4c2a2e6c508f2c8032655003cd728ec85663e9796616990e25b5a" +dependencies = [ + "cc", + "libc", +] + [[package]] name = "cc" version = "1.0.78" @@ -39,6 +59,7 @@ dependencies = [ name = "emulation_worker" version = "0.1.0" dependencies = [ + "capstone", "num", "priority-queue", "pyo3", diff --git a/emulation_worker/Cargo.toml b/emulation_worker/Cargo.toml index 9eca8c0..4935506 100644 --- a/emulation_worker/Cargo.toml +++ b/emulation_worker/Cargo.toml @@ -13,3 +13,4 @@ pyo3 = { version = "0.17.3", features = ["extension-module"] } unicorn-engine = "2.0.0" num = "0.4.0" priority-queue = "1.3.1" +capstone = "0.11.0" diff --git a/emulation_worker/src/hooks.rs b/emulation_worker/src/hooks.rs index 065ba37..4c5dbfd 100644 --- a/emulation_worker/src/hooks.rs +++ b/emulation_worker/src/hooks.rs @@ -3,36 +3,12 @@ use pyo3::types::{PyDict, PyList}; use std::io; use std::sync::Arc; use unicorn_engine::unicorn_const::{HookType, MemType}; -use unicorn_engine::RegisterARM; use unicorn_engine::Unicorn; use crate::{Fault, FaultModel, FaultType, MemInfo, State}; -fn mem_hook_cb(uc: &mut Unicorn<'_, ()>, mem_type: MemType, address: u64, size: usize, _value: i64, state: &Arc) -> bool { - let pc = uc.reg_read(RegisterARM::PC).unwrap(); - - let identifier = format!("{address}|{pc}"); - - let mut meminfo = state.logs.meminfo.write().unwrap(); - - if meminfo.contains_key(&identifier) { - if let Some(mut element) = meminfo.get_mut(&identifier) { - element.counter += 1; - } - } else { - let last_tbid = *state.last_tbid.read().unwrap(); - meminfo.insert(identifier, MemInfo{ - ins: address, - counter: 1, - direction: if mem_type == MemType::READ { 0 } else { 1 }, - address: pc, - tbid: last_tbid, - size - }); - } - - true -} +mod util; +use util::{log_tb_info, apply_model, undo_faults}; fn block_hook_cb(uc: &mut Unicorn<'_, ()>, address: u64, size: u32, state: &Arc) { // Save current tbid for meminfo logs @@ -75,6 +51,8 @@ fn block_hook_cb(uc: &mut Unicorn<'_, ()>, address: u64, size: u32, state: &Arc< } } + let tbinfo = state.logs.tbinfo.write().unwrap(); + log_tb_info(uc, address, size, &state.cs_engine, tbinfo); } fn end_hook_cb( @@ -102,55 +80,54 @@ fn end_hook_cb( } } -fn undo_faults(uc: &mut Unicorn<'_, ()>, state: &Arc) { - { - let live_faults = state.live_faults.read().unwrap(); - - if live_faults.len() == 0 { - return; - } - - let ((_, _), priority) = live_faults.peek().unwrap(); - let lifespan = u64::MAX - priority; - let instruction_count = *state.instruction_count.read().unwrap(); - - if lifespan > instruction_count { - return; - } - } - - let mut live_faults = state.live_faults.write().unwrap(); - let ((address, prefault_data), _) = live_faults.pop().unwrap(); - - let faults = state.faults.read().unwrap(); - let fault = faults.get(&address).unwrap(); - - println!("Undoing fault"); - match fault.kind { - FaultType::Register => { - uc.reg_write(fault.address as i32, prefault_data.to_u64().unwrap()).expect("failed restoring register value"); - } - FaultType::Data | FaultType::Instruction => { - uc.mem_write(fault.address, prefault_data.to_bytes_le().as_slice()).expect("failed restoring memory value"); - } +fn single_step_hook_cb(uc: &mut Unicorn<'_, ()>, address: u64, size: u32, state: &Arc) { + let mut instruction_count = state.instruction_count.write().unwrap(); + undo_faults( + uc, + *instruction_count, + state.faults.read().unwrap(), + state.live_faults.write().unwrap(), + ); + *instruction_count += 1; + + let mut tbinfo = state.logs.tbinfo.write().unwrap(); + if let Some(tbinfo) = tbinfo.get_mut(&(address, size as usize)) { + tbinfo.num_exec += 1; + } else { + log_tb_info(uc, address, size, &state.cs_engine, tbinfo); } } -fn single_step_hook_cb(uc: &mut Unicorn<'_, ()>, _address: u64, _size: u32, state: &Arc) { - undo_faults(uc, state); +fn mem_hook_cb( + uc: &mut Unicorn<'_, ()>, + mem_type: MemType, + address: u64, + size: usize, + _value: i64, + state: &Arc, +) -> bool { + let pc = uc.pc_read().unwrap(); - let mut instruction_count = state.instruction_count.write().unwrap(); - *instruction_count = *instruction_count + 1; -} + let mut meminfo = state.logs.meminfo.write().unwrap(); -fn apply_model(data: &BigUint, fault: &Fault) -> BigUint { - let mask_big = BigUint::from_bytes_le(&fault.mask.to_le_bytes()); - match fault.model { - FaultModel::Set0 => data ^ (data & mask_big), - FaultModel::Set1 => data | mask_big, - FaultModel::Toggle => data & data, - FaultModel::Overwrite => mask_big + if let Some(mut element) = meminfo.get_mut(&(address, pc)) { + element.counter += 1; + } else { + let last_tbid = *state.last_tbid.read().unwrap(); + meminfo.insert( + (address, pc), + MemInfo { + ins: address, + counter: 1, + direction: if mem_type == MemType::READ { 0 } else { 1 }, + address: pc, + tbid: last_tbid, + size, + }, + ); } + + true } fn fault_hook_cb(uc: &mut Unicorn<'_, ()>, address: u64, _size: u32, state: &Arc) { diff --git a/emulation_worker/src/hooks/util.rs b/emulation_worker/src/hooks/util.rs new file mode 100644 index 0000000..e4adcaa --- /dev/null +++ b/emulation_worker/src/hooks/util.rs @@ -0,0 +1,79 @@ +use std::collections::HashMap; +use priority_queue::PriorityQueue; +use unicorn_engine::Unicorn; +use std::sync::{RwLockReadGuard, RwLockWriteGuard}; +use crate::logs::TbInfoBlock; +use num::{ToPrimitive, BigUint}; +use crate::{Fault, FaultModel, FaultType}; + +pub fn log_tb_info( + uc: &mut Unicorn<'_, ()>, + address: u64, + size: u32, + cs: &capstone::Capstone, + mut tbinfo: RwLockWriteGuard>, +) { + if let Some(tbinfo) = tbinfo.get_mut(&(address, size as usize)) { + tbinfo.num_exec += 1; + } else { + let code = uc.mem_read_as_vec(address, size as usize).unwrap(); + let assembler = cs.disasm_all(code.as_slice(), address).unwrap().to_string(); + tbinfo.insert( + (address, size as usize), + TbInfoBlock { + id: address, + size, + ins_count: assembler.matches('\n').count() as u32, + num_exec: 1, + assembler, + }, + ); + } +} + +pub fn apply_model(data: &BigUint, fault: &Fault) -> BigUint { + let mask_big = BigUint::from_bytes_le(&fault.mask.to_le_bytes()); + match fault.model { + FaultModel::Set0 => data ^ (data & mask_big), + FaultModel::Set1 => data | mask_big, + FaultModel::Toggle => { + let mask = (BigUint::from(1u32) << data.bits()) - BigUint::from(1u32); + data ^ mask + } + FaultModel::Overwrite => mask_big, + } +} + +pub fn undo_faults( + uc: &mut Unicorn<'_, ()>, + instruction_count: u64, + faults: RwLockReadGuard>, + mut live_faults: RwLockWriteGuard>, +) { + if live_faults.len() == 0 { + return; + } + + let ((_, _), priority) = live_faults.peek().unwrap(); + let lifespan = u64::MAX - priority; + + if lifespan > instruction_count { + return; + } + + let ((address, prefault_data), _) = live_faults.pop().unwrap(); + + let fault = faults.get(&address).unwrap(); + + println!("Undoing fault"); + match fault.kind { + FaultType::Register => { + uc.reg_write(fault.address as i32, prefault_data.to_u64().unwrap()) + .expect("failed restoring register value"); + } + FaultType::Data | FaultType::Instruction => { + uc.mem_write(fault.address, prefault_data.to_bytes_le().as_slice()) + .expect("failed restoring memory value"); + } + } +} diff --git a/emulation_worker/src/lib.rs b/emulation_worker/src/lib.rs index 3f5da6b..ed6028e 100644 --- a/emulation_worker/src/lib.rs +++ b/emulation_worker/src/lib.rs @@ -1,3 +1,4 @@ +use capstone::{prelude::BuildsCapstone, Capstone}; use priority_queue::PriorityQueue; use pyo3::{ prelude::*, @@ -62,6 +63,7 @@ fn run_unicorn( let logs = Logs { meminfo: RwLock::new(HashMap::new()), endpoint: RwLock::new((false, 0, 0)), + tbinfo: RwLock::new(HashMap::new()) }; let state = State { @@ -71,7 +73,12 @@ fn run_unicorn( live_faults: RwLock::new(PriorityQueue::new()), instruction_count: RwLock::new(0), single_step_hook_handle: RwLock::new(None), - logs + cs_engine: Capstone::new() + .arm() + .mode(capstone::arch::arm::ArchMode::Thumb) + .build() + .unwrap(), + logs, }; let state_arc: Arc = Arc::new(state); diff --git a/emulation_worker/src/logs.rs b/emulation_worker/src/logs.rs index e2d38ad..9dc7b07 100644 --- a/emulation_worker/src/logs.rs +++ b/emulation_worker/src/logs.rs @@ -1,3 +1,4 @@ +use capstone::Capstone; use num::BigUint; use priority_queue::PriorityQueue; use pyo3::{ @@ -87,9 +88,31 @@ pub struct Fault { pub wildcard: bool, } +pub struct TbInfoBlock { + pub id: u64, + pub size: u32, + pub ins_count: u32, + pub num_exec: u32, + pub assembler: String, +} + +impl ToPyObject for TbInfoBlock { + fn to_object(&self, py: Python<'_>) -> PyObject { + let dict = PyDict::new(py); + dict.set_item("id", self.id).unwrap(); + dict.set_item("size", self.size).unwrap(); + dict.set_item("ins_count", self.ins_count).unwrap(); + dict.set_item("num_exec", self.num_exec).unwrap(); + dict.set_item("assembler", self.assembler.clone()).unwrap(); + + dict.to_object(py) + } +} + pub struct Logs { - pub meminfo: RwLock>, - pub endpoint: RwLock<(bool, u64, u32)> + pub meminfo: RwLock>, + pub endpoint: RwLock<(bool, u64, u32)>, + pub tbinfo: RwLock> } impl ToPyObject for Logs { @@ -102,6 +125,10 @@ impl ToPyObject for Logs { dict.set_item("meminfo", meminfo_list.to_object(py)) .unwrap(); + let tbinfo = self.tbinfo.read().unwrap(); + let tbinfo_list = PyList::new(py, tbinfo.values()); + dict.set_item("tbinfo", tbinfo_list.to_object(py)).unwrap(); + let endpoint = self.endpoint.read().unwrap(); if endpoint.2 == 1 { dict.set_item("end_reason", format!("{}/1", endpoint.1)) @@ -124,6 +151,7 @@ pub struct State { pub live_faults: RwLock>, pub instruction_count: RwLock, pub single_step_hook_handle: RwLock>, + pub cs_engine: Capstone, pub logs: Logs, } From 8198c5263539c5b76e1b42403cc6a3f69d22147b Mon Sep 17 00:00:00 2001 From: Kevin Schneider Date: Sat, 11 Feb 2023 19:12:11 +0100 Subject: [PATCH 08/13] add tbexec logs --- emulation_worker/src/hooks.rs | 14 +++++++------- emulation_worker/src/hooks/util.rs | 18 +++++++++++++----- emulation_worker/src/lib.rs | 3 ++- emulation_worker/src/logs.rs | 22 +++++++++++++++++++++- 4 files changed, 43 insertions(+), 14 deletions(-) diff --git a/emulation_worker/src/hooks.rs b/emulation_worker/src/hooks.rs index 4c5dbfd..0317d65 100644 --- a/emulation_worker/src/hooks.rs +++ b/emulation_worker/src/hooks.rs @@ -8,7 +8,7 @@ use unicorn_engine::Unicorn; use crate::{Fault, FaultModel, FaultType, MemInfo, State}; mod util; -use util::{log_tb_info, apply_model, undo_faults}; +use util::{apply_model, log_tb_exec, log_tb_info, undo_faults}; fn block_hook_cb(uc: &mut Unicorn<'_, ()>, address: u64, size: u32, state: &Arc) { // Save current tbid for meminfo logs @@ -53,6 +53,8 @@ fn block_hook_cb(uc: &mut Unicorn<'_, ()>, address: u64, size: u32, state: &Arc< let tbinfo = state.logs.tbinfo.write().unwrap(); log_tb_info(uc, address, size, &state.cs_engine, tbinfo); + let tbexec = state.logs.tbexec.write().unwrap(); + log_tb_exec(address, tbexec); } fn end_hook_cb( @@ -90,12 +92,10 @@ fn single_step_hook_cb(uc: &mut Unicorn<'_, ()>, address: u64, size: u32, state: ); *instruction_count += 1; - let mut tbinfo = state.logs.tbinfo.write().unwrap(); - if let Some(tbinfo) = tbinfo.get_mut(&(address, size as usize)) { - tbinfo.num_exec += 1; - } else { - log_tb_info(uc, address, size, &state.cs_engine, tbinfo); - } + let tbinfo = state.logs.tbinfo.write().unwrap(); + log_tb_info(uc, address, size, &state.cs_engine, tbinfo); + let tbexec = state.logs.tbexec.write().unwrap(); + log_tb_exec(address, tbexec); } fn mem_hook_cb( diff --git a/emulation_worker/src/hooks/util.rs b/emulation_worker/src/hooks/util.rs index e4adcaa..8b148ce 100644 --- a/emulation_worker/src/hooks/util.rs +++ b/emulation_worker/src/hooks/util.rs @@ -1,10 +1,10 @@ -use std::collections::HashMap; +use crate::logs::{TbExecEntry, TbInfoBlock}; +use crate::{Fault, FaultModel, FaultType}; +use num::{BigUint, ToPrimitive}; use priority_queue::PriorityQueue; -use unicorn_engine::Unicorn; +use std::collections::HashMap; use std::sync::{RwLockReadGuard, RwLockWriteGuard}; -use crate::logs::TbInfoBlock; -use num::{ToPrimitive, BigUint}; -use crate::{Fault, FaultModel, FaultType}; +use unicorn_engine::Unicorn; pub fn log_tb_info( uc: &mut Unicorn<'_, ()>, @@ -31,6 +31,14 @@ pub fn log_tb_info( } } +pub fn log_tb_exec(address: u64, mut tbexec: RwLockWriteGuard>) { + let tbexec_len = tbexec.len(); + tbexec.push(TbExecEntry { + pos: tbexec_len as u64, + tb: address, + }); +} + pub fn apply_model(data: &BigUint, fault: &Fault) -> BigUint { let mask_big = BigUint::from_bytes_le(&fault.mask.to_le_bytes()); match fault.model { diff --git a/emulation_worker/src/lib.rs b/emulation_worker/src/lib.rs index ed6028e..2d3ee49 100644 --- a/emulation_worker/src/lib.rs +++ b/emulation_worker/src/lib.rs @@ -63,7 +63,8 @@ fn run_unicorn( let logs = Logs { meminfo: RwLock::new(HashMap::new()), endpoint: RwLock::new((false, 0, 0)), - tbinfo: RwLock::new(HashMap::new()) + tbinfo: RwLock::new(HashMap::new()), + tbexec: RwLock::new(Vec::new()), }; let state = State { diff --git a/emulation_worker/src/logs.rs b/emulation_worker/src/logs.rs index 9dc7b07..c13a44b 100644 --- a/emulation_worker/src/logs.rs +++ b/emulation_worker/src/logs.rs @@ -109,10 +109,26 @@ impl ToPyObject for TbInfoBlock { } } +pub struct TbExecEntry { + pub tb: u64, + pub pos: u64, +} + +impl ToPyObject for TbExecEntry { + fn to_object(&self, py: Python<'_>) -> PyObject { + let dict = PyDict::new(py); + dict.set_item("tb", self.tb).unwrap(); + dict.set_item("pos", self.pos).unwrap(); + + dict.to_object(py) + } +} + pub struct Logs { pub meminfo: RwLock>, pub endpoint: RwLock<(bool, u64, u32)>, - pub tbinfo: RwLock> + pub tbinfo: RwLock>, + pub tbexec: RwLock>, } impl ToPyObject for Logs { @@ -129,6 +145,10 @@ impl ToPyObject for Logs { let tbinfo_list = PyList::new(py, tbinfo.values()); dict.set_item("tbinfo", tbinfo_list.to_object(py)).unwrap(); + let tbexec = self.tbexec.read().unwrap(); + let tbexec_list = PyList::new(py, tbexec.as_slice()); + dict.set_item("tbexec", tbexec_list.to_object(py)).unwrap(); + let endpoint = self.endpoint.read().unwrap(); if endpoint.2 == 1 { dict.set_item("end_reason", format!("{}/1", endpoint.1)) From 2c1892e3373035ccd399f626589801e7732eb3a8 Mon Sep 17 00:00:00 2001 From: Kevin Schneider Date: Sat, 18 Feb 2023 16:24:45 +0100 Subject: [PATCH 09/13] filter tbexec/tbinfo entries --- emulation_worker/src/hooks.rs | 11 ++++++++++- emulation_worker/src/hooks/util.rs | 25 ++++++++++++++++++------- emulation_worker/src/lib.rs | 2 +- emulation_worker/src/logs.rs | 3 ++- faultclass.py | 11 +++++++++++ 5 files changed, 42 insertions(+), 10 deletions(-) diff --git a/emulation_worker/src/hooks.rs b/emulation_worker/src/hooks.rs index 0317d65..c23d24f 100644 --- a/emulation_worker/src/hooks.rs +++ b/emulation_worker/src/hooks.rs @@ -48,6 +48,7 @@ fn block_hook_cb(uc: &mut Unicorn<'_, ()>, address: u64, size: u32, state: &Arc< uc.add_code_hook(u64::MIN, u64::MAX, single_step_hook_closure) .unwrap(), ); + return; } } @@ -60,7 +61,7 @@ fn block_hook_cb(uc: &mut Unicorn<'_, ()>, address: u64, size: u32, state: &Arc< fn end_hook_cb( uc: &mut Unicorn<'_, ()>, address: u64, - _size: u32, + size: u32, state: &Arc, first_endpoint: u64, ) { @@ -77,6 +78,14 @@ fn end_hook_cb( let mut endpoint = state.logs.endpoint.write().unwrap(); *endpoint = (address == first_endpoint, address, 1); println!("Reached endpoint at {address:?}"); + + // Since this hook has been registered before the single step hook we need to call it + // manullay to log the last instruction, since the callback would not be called otherwise + let single_step_hook_handle = state.single_step_hook_handle.read().unwrap(); + if single_step_hook_handle.is_some() { + single_step_hook_cb(uc, address, size, state); + } + uc.emu_stop() .expect("failed terminating the emulation engine"); } diff --git a/emulation_worker/src/hooks/util.rs b/emulation_worker/src/hooks/util.rs index 8b148ce..8c6d4c5 100644 --- a/emulation_worker/src/hooks/util.rs +++ b/emulation_worker/src/hooks/util.rs @@ -17,13 +17,27 @@ pub fn log_tb_info( tbinfo.num_exec += 1; } else { let code = uc.mem_read_as_vec(address, size as usize).unwrap(); - let assembler = cs.disasm_all(code.as_slice(), address).unwrap().to_string(); + + // Formatting may look a bit weird, but this needs to conform to QEMU's disassembly output + let mut assembler = String::from(" "); + let instructions = cs.disasm_all(code.as_slice(), address).unwrap(); + for insn in instructions.iter() { + assembler += format!( + "[ {:x} ]: {} {} \n", + insn.address(), + insn.mnemonic().unwrap(), + insn.op_str().unwrap() + ) + .as_str(); + } + assembler += " \n"; + tbinfo.insert( (address, size as usize), TbInfoBlock { id: address, size, - ins_count: assembler.matches('\n').count() as u32, + ins_count: assembler.matches('\n').count() as u32 - 1, num_exec: 1, assembler, }, @@ -32,11 +46,8 @@ pub fn log_tb_info( } pub fn log_tb_exec(address: u64, mut tbexec: RwLockWriteGuard>) { - let tbexec_len = tbexec.len(); - tbexec.push(TbExecEntry { - pos: tbexec_len as u64, - tb: address, - }); + let pos: u64 = tbexec.len() as u64 - 1; + tbexec.push(TbExecEntry { pos, tb: address }); } pub fn apply_model(data: &BigUint, fault: &Fault) -> BigUint { diff --git a/emulation_worker/src/lib.rs b/emulation_worker/src/lib.rs index 2d3ee49..46627fe 100644 --- a/emulation_worker/src/lib.rs +++ b/emulation_worker/src/lib.rs @@ -96,7 +96,7 @@ fn run_unicorn( 0, max_instruction_count, ) - .expect("failed to emulate code"); + .unwrap_or_else(|_| panic!("failed to emulate code. PC: {}", emu.pc_read().unwrap())); Python::with_gil(|py| Ok(state_arc.logs.to_object(py))) } diff --git a/emulation_worker/src/logs.rs b/emulation_worker/src/logs.rs index c13a44b..a629a22 100644 --- a/emulation_worker/src/logs.rs +++ b/emulation_worker/src/logs.rs @@ -145,7 +145,8 @@ impl ToPyObject for Logs { let tbinfo_list = PyList::new(py, tbinfo.values()); dict.set_item("tbinfo", tbinfo_list.to_object(py)).unwrap(); - let tbexec = self.tbexec.read().unwrap(); + let mut tbexec = self.tbexec.write().unwrap(); + tbexec.remove(0); let tbexec_list = PyList::new(py, tbexec.as_slice()); dict.set_item("tbexec", tbexec_list.to_object(py)).unwrap(); diff --git a/faultclass.py b/faultclass.py index 8c608ea..02ff640 100644 --- a/faultclass.py +++ b/faultclass.py @@ -810,6 +810,17 @@ def python_worker_unicorn( logs["index"] = index logs["faultlist"] = fault_list + pdtbexeclist = pd.DataFrame(logs["tbexec"]) + [pdtbexeclist, tblist] = filter_tb( + pdtbexeclist, + logs["tbinfo"], + goldenrun_data["tbexec"], + goldenrun_data["tbinfo"], + index, + ) + logs["tbexec"] = write_output_wrt_goldenrun("tbexec", pdtbexeclist, goldenrun_data) + logs["tbinfo"] = write_output_wrt_goldenrun("tbinfo", tblist, goldenrun_data) + queue_output.put(logs) logger.info( From 8cd5b1162e2ef02aba403cdfc78256c055a2f563 Mon Sep 17 00:00:00 2001 From: Kevin Schneider Date: Sun, 19 Feb 2023 18:03:53 +0100 Subject: [PATCH 10/13] add memory and arm/riscv register dumps --- emulation_worker/src/architecture.rs | 147 +++++++++++++++++++ emulation_worker/src/arm.rs | 85 ----------- emulation_worker/src/hooks.rs | 93 +++++++++--- emulation_worker/src/hooks/util.rs | 41 +++++- emulation_worker/src/lib.rs | 105 ++++++++----- emulation_worker/src/{logs.rs => structs.rs} | 32 +++- faultclass.py | 17 ++- 7 files changed, 366 insertions(+), 154 deletions(-) create mode 100644 emulation_worker/src/architecture.rs delete mode 100644 emulation_worker/src/arm.rs rename emulation_worker/src/{logs.rs => structs.rs} (82%) diff --git a/emulation_worker/src/architecture.rs b/emulation_worker/src/architecture.rs new file mode 100644 index 0000000..93f01da --- /dev/null +++ b/emulation_worker/src/architecture.rs @@ -0,0 +1,147 @@ +use pyo3::types::PyDict; +use std::collections::HashMap; +use std::sync::RwLockWriteGuard; +use unicorn_engine::{ + unicorn_const::{Arch, Mode}, + RegisterARM, RegisterRISCV, Unicorn, +}; + +static RISCV_REGISTERS: &[(&str, u8)] = &[ + ("pc", RegisterRISCV::PC as u8), + ("x0", RegisterRISCV::X0 as u8), + ("x1", RegisterRISCV::X1 as u8), + ("x2", RegisterRISCV::X2 as u8), + ("x3", RegisterRISCV::X3 as u8), + ("x4", RegisterRISCV::X4 as u8), + ("x5", RegisterRISCV::X5 as u8), + ("x6", RegisterRISCV::X6 as u8), + ("x7", RegisterRISCV::X7 as u8), + ("x8", RegisterRISCV::X8 as u8), + ("x9", RegisterRISCV::X9 as u8), + ("x10", RegisterRISCV::X10 as u8), + ("x11", RegisterRISCV::X11 as u8), + ("x12", RegisterRISCV::X12 as u8), + ("x13", RegisterRISCV::X13 as u8), + ("x14", RegisterRISCV::X14 as u8), + ("x15", RegisterRISCV::X15 as u8), + ("x16", RegisterRISCV::X16 as u8), + ("x17", RegisterRISCV::X17 as u8), + ("x18", RegisterRISCV::X18 as u8), + ("x19", RegisterRISCV::X19 as u8), + ("x20", RegisterRISCV::X20 as u8), + ("x21", RegisterRISCV::X21 as u8), + ("x22", RegisterRISCV::X22 as u8), + ("x23", RegisterRISCV::X23 as u8), + ("x24", RegisterRISCV::X24 as u8), + ("x25", RegisterRISCV::X25 as u8), + ("x26", RegisterRISCV::X26 as u8), + ("x27", RegisterRISCV::X27 as u8), + ("x28", RegisterRISCV::X28 as u8), + ("x29", RegisterRISCV::X29 as u8), + ("x30", RegisterRISCV::X30 as u8), + ("x31", RegisterRISCV::X31 as u8), +]; + +static ARM_REGISTERS: &[(&str, u8)] = &[ + ("pc", RegisterARM::PC as u8), + ("r0", RegisterARM::R0 as u8), + ("r1", RegisterARM::R1 as u8), + ("r2", RegisterARM::R2 as u8), + ("r3", RegisterARM::R3 as u8), + ("r4", RegisterARM::R4 as u8), + ("r5", RegisterARM::R5 as u8), + ("r6", RegisterARM::R6 as u8), + ("r7", RegisterARM::R7 as u8), + ("r8", RegisterARM::R8 as u8), + ("r9", RegisterARM::R9 as u8), + ("r10", RegisterARM::R10 as u8), + ("r11", RegisterARM::R11 as u8), + ("r12", RegisterARM::R12 as u8), + ("r13", RegisterARM::R13 as u8), + ("r14", RegisterARM::R14 as u8), + ("r15", RegisterARM::R15 as u8), + ("xpsr", RegisterARM::XPSR as u8), +]; + +#[derive(Clone, Copy)] +pub enum Architecture { + Arm, + Riscv, +} + +pub trait ArchitectureDependentOperations { + fn initialize_unicorn(&self) -> Unicorn<()>; + fn initialize_registers( + &self, + uc: &mut Unicorn<()>, + registerdump: &PyDict, + start_address: &mut u64, + ); + fn dump_registers( + &self, + uc: &mut Unicorn<()>, + registerlist: RwLockWriteGuard>>, + tbcounter: u64, + ); +} + +#[derive(Clone)] +pub struct ArchitectureDependentOperator { + pub architecture: Architecture, +} + +impl ArchitectureDependentOperations for ArchitectureDependentOperator { + fn initialize_unicorn(self: &ArchitectureDependentOperator) -> Unicorn<()> { + match self.architecture { + Architecture::Arm => { + Unicorn::new(Arch::ARM, Mode::THUMB).expect("failed to initialize Unicorn instance") + } + Architecture::Riscv => Unicorn::new(Arch::RISCV, Mode::RISCV64) + .expect("failed to initialize Unicorn instance"), + } + } + + fn initialize_registers( + self: &ArchitectureDependentOperator, + uc: &mut Unicorn<()>, + registerdump: &PyDict, + start_address: &mut u64, + ) { + let registers; + match self.architecture { + Architecture::Arm => { + registers = ARM_REGISTERS; + let xpsr_value: u64 = registerdump.get_item("xpsr").unwrap().extract().unwrap(); + *start_address |= (xpsr_value >> 24) & 1; // Activate thumb mode by setting least + // significant bit of pc if T-bit is set + // in xpsr register + } + Architecture::Riscv => registers = RISCV_REGISTERS, + } + for (name, reg) in registers { + uc.reg_write( + *reg, + registerdump.get_item(*name).unwrap().extract().unwrap(), + ) + .unwrap(); + } + } + + fn dump_registers( + self: &ArchitectureDependentOperator, + uc: &mut Unicorn<()>, + mut registerlist: RwLockWriteGuard>>, + tbcounter: u64, + ) { + let mut dump = HashMap::new(); + let registers = match self.architecture { + Architecture::Arm => ARM_REGISTERS, + Architecture::Riscv => RISCV_REGISTERS, + }; + for (name, reg) in registers { + dump.insert(name.to_string(), uc.reg_read(*reg).unwrap()); + } + dump.insert("tbcounter".to_string(), tbcounter); + registerlist.push(dump); + } +} diff --git a/emulation_worker/src/arm.rs b/emulation_worker/src/arm.rs deleted file mode 100644 index 3a0c41e..0000000 --- a/emulation_worker/src/arm.rs +++ /dev/null @@ -1,85 +0,0 @@ -use pyo3::types::PyDict; -use unicorn_engine::{RegisterARM, Unicorn}; - -pub fn initialize_arm_registers(emu: &mut Unicorn<()>, registerdump: &PyDict) { - emu.reg_write( - RegisterARM::R0, - registerdump.get_item("r0").unwrap().extract().unwrap(), - ) - .unwrap(); - emu.reg_write( - RegisterARM::R1, - registerdump.get_item("r1").unwrap().extract().unwrap(), - ) - .unwrap(); - emu.reg_write( - RegisterARM::R2, - registerdump.get_item("r2").unwrap().extract().unwrap(), - ) - .unwrap(); - emu.reg_write( - RegisterARM::R3, - registerdump.get_item("r3").unwrap().extract().unwrap(), - ) - .unwrap(); - emu.reg_write( - RegisterARM::R4, - registerdump.get_item("r4").unwrap().extract().unwrap(), - ) - .unwrap(); - emu.reg_write( - RegisterARM::R5, - registerdump.get_item("r5").unwrap().extract().unwrap(), - ) - .unwrap(); - emu.reg_write( - RegisterARM::R6, - registerdump.get_item("r6").unwrap().extract().unwrap(), - ) - .unwrap(); - emu.reg_write( - RegisterARM::R7, - registerdump.get_item("r7").unwrap().extract().unwrap(), - ) - .unwrap(); - emu.reg_write( - RegisterARM::R8, - registerdump.get_item("r8").unwrap().extract().unwrap(), - ) - .unwrap(); - emu.reg_write( - RegisterARM::R9, - registerdump.get_item("r9").unwrap().extract().unwrap(), - ) - .unwrap(); - emu.reg_write( - RegisterARM::R10, - registerdump.get_item("r10").unwrap().extract().unwrap(), - ) - .unwrap(); - emu.reg_write( - RegisterARM::R11, - registerdump.get_item("r11").unwrap().extract().unwrap(), - ) - .unwrap(); - emu.reg_write( - RegisterARM::R12, - registerdump.get_item("r12").unwrap().extract().unwrap(), - ) - .unwrap(); - emu.reg_write( - RegisterARM::R13, - registerdump.get_item("r13").unwrap().extract().unwrap(), - ) - .unwrap(); - emu.reg_write( - RegisterARM::R14, - registerdump.get_item("r14").unwrap().extract().unwrap(), - ) - .unwrap(); - emu.reg_write( - RegisterARM::R15, - registerdump.get_item("r15").unwrap().extract().unwrap(), - ) - .unwrap(); -} diff --git a/emulation_worker/src/hooks.rs b/emulation_worker/src/hooks.rs index c23d24f..2bd0ace 100644 --- a/emulation_worker/src/hooks.rs +++ b/emulation_worker/src/hooks.rs @@ -1,19 +1,19 @@ use num::{BigUint, ToPrimitive}; use pyo3::types::{PyDict, PyList}; -use std::io; -use std::sync::Arc; +use std::{collections::HashMap, io, sync::Arc}; use unicorn_engine::unicorn_const::{HookType, MemType}; use unicorn_engine::Unicorn; -use crate::{Fault, FaultModel, FaultType, MemInfo, State}; +use crate::{ArchitectureDependentOperations, Fault, FaultType, MemInfo, State}; mod util; -use util::{apply_model, log_tb_exec, log_tb_info, undo_faults}; +use util::{apply_model, calculate_fault_size, dump_memory, log_tb_exec, log_tb_info, undo_faults}; fn block_hook_cb(uc: &mut Unicorn<'_, ()>, address: u64, size: u32, state: &Arc) { // Save current tbid for meminfo logs let mut last_tbid = state.last_tbid.write().unwrap(); *last_tbid = address; + *state.tbcounter.write().unwrap() += 1; let live_faults = state.live_faults.read().unwrap(); let mut single_step_hook_handle = state.single_step_hook_handle.write().unwrap(); @@ -64,6 +64,7 @@ fn end_hook_cb( size: u32, state: &Arc, first_endpoint: u64, + memorydump: &Vec>, ) { let mut endpoints = state.endpoints.write().unwrap(); @@ -86,6 +87,15 @@ fn end_hook_cb( single_step_hook_cb(uc, address, size, state); } + for dump_info in memorydump { + dump_memory( + uc, + *(*dump_info).get("address").unwrap(), + *(*dump_info).get("length").unwrap() as u32, + state.logs.memdumps.write().unwrap(), + ); + } + uc.emu_stop() .expect("failed terminating the emulation engine"); } @@ -93,12 +103,28 @@ fn end_hook_cb( fn single_step_hook_cb(uc: &mut Unicorn<'_, ()>, address: u64, size: u32, state: &Arc) { let mut instruction_count = state.instruction_count.write().unwrap(); - undo_faults( + let undone_fault = undo_faults( uc, *instruction_count, state.faults.read().unwrap(), state.live_faults.write().unwrap(), ); + + if let Some(fault) = undone_fault { + if matches!(fault.kind, FaultType::Register) { + dump_memory( + uc, + address, + calculate_fault_size(&fault), + state.logs.memdumps.write().unwrap(), + ); + } + state.arch_operator.dump_registers( + uc, + state.logs.registerlist.write().unwrap(), + *state.tbcounter.read().unwrap(), + ); + } *instruction_count += 1; let tbinfo = state.logs.tbinfo.write().unwrap(); @@ -157,11 +183,7 @@ fn fault_hook_cb(uc: &mut Unicorn<'_, ()>, address: u64, _size: u32, state: &Arc match fault.kind { FaultType::Data | FaultType::Instruction => { - let fault_size = if matches!(fault.model, FaultModel::Overwrite) { - fault.num_bytes - } else { - 1 - }; + let fault_size = calculate_fault_size(fault); let data = BigUint::from_bytes_le( uc.mem_read_as_vec(fault.address, fault_size as usize) .unwrap() @@ -173,11 +195,22 @@ fn fault_hook_cb(uc: &mut Unicorn<'_, ()>, address: u64, _size: u32, state: &Arc data, apply_model(&data, fault) ); - uc.mem_write( + dump_memory( + uc, fault.address, - apply_model(&data, fault).to_bytes_le().as_slice(), - ) - .expect("failed writing fault data to memory"); + fault_size, + state.logs.memdumps.write().unwrap(), + ); + let mut fault_data = apply_model(&data, fault).to_bytes_le(); + fault_data.extend(std::iter::repeat(0).take(fault_size as usize - fault_data.len())); + uc.mem_write(fault.address, fault_data.as_slice()) + .expect("failed writing fault data to memory"); + dump_memory( + uc, + fault.address, + fault_size, + state.logs.memdumps.write().unwrap(), + ); } FaultType::Register => { let register_value = BigUint::from( @@ -199,6 +232,12 @@ fn fault_hook_cb(uc: &mut Unicorn<'_, ()>, address: u64, _size: u32, state: &Arc u64::MAX - fault.lifespan as u64 + instruction_count, ); } + + state.arch_operator.dump_registers( + uc, + state.logs.registerlist.write().unwrap(), + *state.tbcounter.read().unwrap(), + ); } fn initialize_mem_hook(emu: &mut Unicorn<()>, state_arc: &Arc) -> io::Result<()> { @@ -232,11 +271,12 @@ fn initialize_block_hook(emu: &mut Unicorn<()>, state_arc: &Arc) -> io::R Ok(()) } -fn initialize_end_hook( - emu: &mut Unicorn<()>, +fn initialize_end_hook<'a, 'b: 'a>( + emu: &mut Unicorn<'a, ()>, state_arc: &Arc, config: &PyDict, first_endpoint: u64, + memorydump: &'b Vec>, ) -> io::Result<()> { let config_endpoints: &PyList = config.get_item("end").unwrap().extract()?; for obj in config_endpoints { @@ -253,7 +293,7 @@ fn initialize_end_hook( let state_arc = Arc::clone(&state_arc); let end_hook_closure = move |uc: &mut Unicorn<'_, ()>, address: u64, size: u32| { - end_hook_cb(uc, address, size, &state_arc, first_endpoint); + end_hook_cb(uc, address, size, &state_arc, first_endpoint, memorydump); }; emu.add_code_hook(address, address, end_hook_closure) .expect("failed to add end hook"); @@ -265,13 +305,13 @@ fn initialize_end_hook( fn initialize_fault_hook( emu: &mut Unicorn<()>, state_arc: &Arc, - faults: Vec, + faults: &Vec, ) -> io::Result<()> { for fault in faults { let state_arc = Arc::clone(state_arc); let mut state_faults = state_arc.faults.write().unwrap(); - state_faults.insert(fault.trigger.address, fault); + state_faults.insert(fault.trigger.address, *fault); drop(state_faults); let state_arc = Arc::clone(&state_arc); @@ -290,15 +330,22 @@ fn initialize_fault_hook( Ok(()) } -pub fn initialize_hooks( - emu: &mut Unicorn<()>, +pub fn initialize_hooks<'a, 'b: 'a>( + emu: &mut Unicorn<'a, ()>, state_arc: &Arc, - faults: Vec, + faults: &Vec, + memorydump: &'b Vec>, config: &PyDict, ) -> io::Result<()> { initialize_mem_hook(emu, state_arc)?; initialize_block_hook(emu, state_arc)?; - initialize_end_hook(emu, state_arc, config, faults[0].trigger.address)?; + initialize_end_hook( + emu, + state_arc, + config, + faults[0].trigger.address, + memorydump, + )?; initialize_fault_hook(emu, state_arc, faults)?; Ok(()) diff --git a/emulation_worker/src/hooks/util.rs b/emulation_worker/src/hooks/util.rs index 8c6d4c5..c2b675a 100644 --- a/emulation_worker/src/hooks/util.rs +++ b/emulation_worker/src/hooks/util.rs @@ -1,5 +1,5 @@ -use crate::logs::{TbExecEntry, TbInfoBlock}; -use crate::{Fault, FaultModel, FaultType}; +use crate::structs::{TbExecEntry, TbInfoBlock}; +use crate::{Fault, FaultModel, FaultType, MemDump}; use num::{BigUint, ToPrimitive}; use priority_queue::PriorityQueue; use std::collections::HashMap; @@ -63,21 +63,22 @@ pub fn apply_model(data: &BigUint, fault: &Fault) -> BigUint { } } +// Undo active faults. Returns fault that has been undone pub fn undo_faults( uc: &mut Unicorn<'_, ()>, instruction_count: u64, faults: RwLockReadGuard>, mut live_faults: RwLockWriteGuard>, -) { +) -> Option { if live_faults.len() == 0 { - return; + return None; } let ((_, _), priority) = live_faults.peek().unwrap(); let lifespan = u64::MAX - priority; if lifespan > instruction_count { - return; + return None; } let ((address, prefault_data), _) = live_faults.pop().unwrap(); @@ -95,4 +96,34 @@ pub fn undo_faults( .expect("failed restoring memory value"); } } + + Some(*fault) +} + +pub fn dump_memory( + uc: &mut Unicorn<'_, ()>, + address: u64, + size: u32, + mut memdumps: RwLockWriteGuard>, +) { + let dump = uc.mem_read_as_vec(address, size as usize).unwrap(); + + if let Some(mem_dump) = memdumps.get_mut(&address) { + mem_dump.dumps.push(dump); + } else { + let mem_dump = MemDump { + address, + len: size, + dumps: Vec::from([dump]), + }; + memdumps.insert(address, mem_dump); + } +} + +pub fn calculate_fault_size(fault: &Fault) -> u32 { + if matches!(fault.model, FaultModel::Overwrite) { + return fault.num_bytes; + } + + ((fault.mask as f64).log2() / 8_f64).floor() as u32 + 1 } diff --git a/emulation_worker/src/lib.rs b/emulation_worker/src/lib.rs index 46627fe..fe78673 100644 --- a/emulation_worker/src/lib.rs +++ b/emulation_worker/src/lib.rs @@ -7,18 +7,15 @@ use pyo3::{ use std::collections::HashMap; use std::sync::{Arc, RwLock}; -use unicorn_engine::Unicorn; -use unicorn_engine::unicorn_const::{ - Arch, - Mode, - Permission, -}; +use unicorn_engine::unicorn_const::Permission; -mod arm; -use crate::arm::initialize_arm_registers; +mod architecture; +use crate::architecture::{ + Architecture, ArchitectureDependentOperations, ArchitectureDependentOperator, +}; -mod logs; -use crate::logs::{Fault, FaultModel, FaultType, Logs, MemInfo, State}; +mod structs; +use crate::structs::{Fault, FaultModel, FaultType, Logs, MemDump, MemInfo, State}; mod hooks; use crate::hooks::initialize_hooks; @@ -29,10 +26,60 @@ fn run_unicorn( faults: Vec, config: &PyDict, ) -> PyResult { - println!("{config:?}"); - let mut unicorn = - Unicorn::new(Arch::ARM, Mode::THUMB).expect("failed to initialize Unicorn instance"); - let emu = &mut unicorn; + let arch_str = pregoldenrun_data + .get_item("architecture") + .unwrap() + .extract()?; + let arch: Architecture = match arch_str { + "arm" => Architecture::Arm, + "riscv64" => Architecture::Riscv, + _ => panic!("Unsupported architecture"), + }; + + let memorydump: Vec> = config + .get_item("memorydump") + .map_or_else(Vec::new, |obj| obj.extract().unwrap()); + + let arch_operator = ArchitectureDependentOperator { architecture: arch }; + let emu = &mut arch_operator.initialize_unicorn(); + + let registerdumps: &PyList = pregoldenrun_data + .get_item(String::from(arch_str) + "registers") + .unwrap() + .extract()?; + let start: HashMap = config.get_item("start").unwrap().extract()?; + let mut start_address = *start.get("address").unwrap(); + arch_operator.initialize_registers( + emu, + registerdumps.get_item(0).unwrap().extract()?, + &mut start_address, + ); + + let memmaplist: &PyList = pregoldenrun_data + .get_item("memmaplist") + .unwrap() + .extract()?; + + for obj in memmaplist.iter() { + let memmap: &PyDict = obj.extract()?; + let address: u64 = memmap.get_item("address").unwrap().extract()?; + let length: usize = memmap.get_item("length").unwrap().extract()?; + //println!("Mapping memory at {:x} size {:x}", address & (u64::MAX ^0xfff), cmp::max(length, 0x1000)); + match emu.mem_map( + address & (u64::MAX ^ 0xfff), + usize::max(length, 0x1000), + Permission::ALL, + ) { + Ok(()) => {} + Err(unicorn_engine::unicorn_const::uc_error::MAP) => { + println!("Memory space is already mapped. Ignoring...") + } + Err(unicorn_engine::unicorn_const::uc_error::NOMEM) => { + println!("Memory space too big, cannot allocate. Ignoring...") + } + Err(err) => panic!("failed mapping memory: {err:?}"), + } + } let memdumplist: &PyList = pregoldenrun_data .get_item("memdumplist") @@ -41,34 +88,25 @@ fn run_unicorn( for obj in memdumplist.iter() { let memdump: &PyDict = obj.extract()?; let address: u64 = memdump.get_item("address").unwrap().extract()?; - let length: usize = memdump.get_item("len").unwrap().extract()?; let dumps: &PyList = memdump.get_item("dumps").unwrap().extract()?; let dump: Vec = dumps.get_item(0).unwrap().extract()?; - // TODO: Use correct permissions - emu.mem_map(address, length, Permission::ALL) - .expect("failed to map code page"); emu.mem_write(address, dump.as_slice()) .expect("failed to write instructions"); } - let armregisters: &PyList = pregoldenrun_data - .get_item("armregisters") - .unwrap() - .extract()?; - let registerdump: &PyDict = armregisters.get_item(0).unwrap().extract()?; - - initialize_arm_registers(emu, registerdump); - let logs = Logs { meminfo: RwLock::new(HashMap::new()), endpoint: RwLock::new((false, 0, 0)), tbinfo: RwLock::new(HashMap::new()), tbexec: RwLock::new(Vec::new()), + registerlist: RwLock::new(Vec::new()), + memdumps: RwLock::new(HashMap::new()), }; let state = State { last_tbid: RwLock::new(0), + tbcounter: RwLock::new(0), endpoints: RwLock::new(HashMap::new()), faults: RwLock::new(HashMap::new()), live_faults: RwLock::new(PriorityQueue::new()), @@ -79,24 +117,21 @@ fn run_unicorn( .mode(capstone::arch::arm::ArchMode::Thumb) .build() .unwrap(), + arch_operator: arch_operator.clone(), logs, }; let state_arc: Arc = Arc::new(state); - initialize_hooks(emu, &state_arc, faults, config).expect("failed initializing hooks"); + initialize_hooks(emu, &state_arc, &faults, &memorydump, config) + .expect("failed initializing hooks"); let max_instruction_count: usize = config .get_item("max_instruction_count") .unwrap() .extract()?; - let start: HashMap = config.get_item("start").unwrap().extract()?; - emu.emu_start( - *start.get("address").unwrap() + 1, - 0, - 0, - max_instruction_count, - ) - .unwrap_or_else(|_| panic!("failed to emulate code. PC: {}", emu.pc_read().unwrap())); + + emu.emu_start(start_address, 0, 0, max_instruction_count) + .unwrap_or_else(|_| println!("failed to emulate code at 0x{:x}", emu.pc_read().unwrap())); Python::with_gil(|py| Ok(state_arc.logs.to_object(py))) } diff --git a/emulation_worker/src/logs.rs b/emulation_worker/src/structs.rs similarity index 82% rename from emulation_worker/src/logs.rs rename to emulation_worker/src/structs.rs index a629a22..adbcfda 100644 --- a/emulation_worker/src/logs.rs +++ b/emulation_worker/src/structs.rs @@ -1,3 +1,4 @@ +use crate::ArchitectureDependentOperator; use capstone::Capstone; use num::BigUint; use priority_queue::PriorityQueue; @@ -124,11 +125,31 @@ impl ToPyObject for TbExecEntry { } } +pub struct MemDump { + pub address: u64, + pub len: u32, + pub dumps: Vec>, +} + +impl ToPyObject for MemDump { + fn to_object(&self, py: Python<'_>) -> PyObject { + let dict = PyDict::new(py); + dict.set_item("address", self.address).unwrap(); + dict.set_item("len", self.len).unwrap(); + dict.set_item("numdumps", self.dumps.len()).unwrap(); + dict.set_item("dumps", self.dumps.to_object(py)).unwrap(); + + dict.to_object(py) + } +} + pub struct Logs { pub meminfo: RwLock>, pub endpoint: RwLock<(bool, u64, u32)>, pub tbinfo: RwLock>, pub tbexec: RwLock>, + pub registerlist: RwLock>>, + pub memdumps: RwLock>, } impl ToPyObject for Logs { @@ -159,7 +180,14 @@ impl ToPyObject for Logs { } dict.set_item("endpoint", if endpoint.0 { 1 } else { 0 }) .unwrap(); - drop(meminfo); + + let registerlist = self.registerlist.read().unwrap(); + dict.set_item("registerlist", registerlist.to_object(py)) + .unwrap(); + + let memdumps = self.memdumps.read().unwrap(); + let memdump_list = PyList::new(py, memdumps.values()); + dict.set_item("memdumplist", memdump_list).unwrap(); dict.to_object(py) } @@ -167,12 +195,14 @@ impl ToPyObject for Logs { pub struct State { pub last_tbid: RwLock, + pub tbcounter: RwLock, pub endpoints: RwLock>, pub faults: RwLock>, pub live_faults: RwLock>, pub instruction_count: RwLock, pub single_step_hook_handle: RwLock>, pub cs_engine: Capstone, + pub arch_operator: ArchitectureDependentOperator, pub logs: Logs, } diff --git a/faultclass.py b/faultclass.py index 02ff640..fa6e3cb 100644 --- a/faultclass.py +++ b/faultclass.py @@ -807,8 +807,13 @@ def python_worker_unicorn( logs = run_unicorn(pregoldenrun_data, fault_list, config_qemu) logger.info(f"Ended qemu for exp {index}! Took {time.time() - t0}") - logs["index"] = index - logs["faultlist"] = fault_list + output = {} + + output["index"] = index + output["faultlist"] = fault_list + output["endpoint"] = logs["endpoint"] + output["end_reason"] = logs["end_reason"] + output["memdumplist"] = logs["memdumplist"] pdtbexeclist = pd.DataFrame(logs["tbexec"]) [pdtbexeclist, tblist] = filter_tb( @@ -818,10 +823,12 @@ def python_worker_unicorn( goldenrun_data["tbinfo"], index, ) - logs["tbexec"] = write_output_wrt_goldenrun("tbexec", pdtbexeclist, goldenrun_data) - logs["tbinfo"] = write_output_wrt_goldenrun("tbinfo", tblist, goldenrun_data) + output["tbexec"] = write_output_wrt_goldenrun("tbexec", pdtbexeclist, goldenrun_data) + output["tbinfo"] = write_output_wrt_goldenrun("tbinfo", tblist, goldenrun_data) + + output["armregisters"] = write_output_wrt_goldenrun("armregisters", pd.DataFrame(logs["registerlist"], dtype="UInt64"), goldenrun_data) - queue_output.put(logs) + queue_output.put(output) logger.info( "Python worker for experiment {} done. Took {}s".format( From 70a6a7d0b1f85ca2ca8aae7cfb3ab6349db48510 Mon Sep 17 00:00:00 2001 From: Kevin Schneider Date: Tue, 28 Feb 2023 13:56:23 +0100 Subject: [PATCH 11/13] dump pregoldenrun memory map; handle registerdumps --- .github/workflows/build.yml | 2 +- README.md | 1 + controller.py | 28 +++++++++++++++++++++++++--- faultclass.py | 36 ++++++++++++++++++++++++++++++------ faultplugin/memmapdump.c | 6 ++++-- goldenrun.py | 7 ++++++- requirements.txt | 1 + 7 files changed, 68 insertions(+), 13 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c3e4ca7..a8aa131 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -18,7 +18,7 @@ jobs: uses: actions/checkout@v2 - name: install packages - run: sudo apt update; sudo apt upgrade -y; sudo apt install -y build-essential ninja-build libglib2.0-dev libfdt-dev libpixman-1-dev zlib1g-dev python3-tables python3-pandas python3-prctl python3-json5 + run: sudo apt update; sudo apt upgrade -y; sudo apt install -y build-essential ninja-build libglib2.0-dev libfdt-dev libpixman-1-dev zlib1g-dev python3-tables python3-pandas python3-prctl python3-json5 python3-pyelftools - name: Install latest stable Rust toolchain uses: actions-rs/toolchain@v1 diff --git a/README.md b/README.md index e5b7924..f086d67 100644 --- a/README.md +++ b/README.md @@ -62,6 +62,7 @@ tables (tested 3.6.1) python-prctl (tested 1.6.1) numpy (tested 1.17.4) json (tested 2.0.9), or json5 (tested 0.9.6) +pyelftools (tested 0.29) ``` These python3 libraries can either be installed using your linux-distribution's installation method or by using pip3. JSON5 is strongly recommended as it allows integers to be represented as hexadecimal numbers. diff --git a/controller.py b/controller.py index eba46a5..ab35585 100755 --- a/controller.py +++ b/controller.py @@ -11,6 +11,8 @@ import subprocess import time +from elftools.elf.elffile import ELFFile + try: import json5 as json @@ -251,7 +253,7 @@ def controller( qemu_pre=None, qemu_post=None, logger_postprocess=None, - unicorn_emulation=False + unicorn_emulation=False, ): """ This function builds the unrolled fault structure, performs golden run and @@ -284,7 +286,12 @@ def controller( config_qemu, qemu_output, queue_output, faultlist, qemu_pre, qemu_post ) pickle.dump( - (config_qemu["max_instruction_count"], pregoldenrun_data, goldenrun_data, faultlist), + ( + config_qemu["max_instruction_count"], + pregoldenrun_data, + goldenrun_data, + faultlist, + ), lzma.open("bkup_goldenrun_results.xz", "wb"), ) else: @@ -295,6 +302,21 @@ def controller( faultlist, ) = pickle.load(lzma.open("bkup_goldenrun_results.xz", "rb")) + if unicorn_emulation: + elffile = ELFFile(open(config_qemu["kernel"], "rb")) + for segment in elffile.iter_segments(): + if segment["p_type"] == "PT_LOAD": + segment_data = segment.data() + pregoldenrun_data["memdumplist"].append( + { + "address": segment["p_vaddr"], + "len": len(segment_data), + "numpdumps": 1, + "dumps": [list(segment_data)], + } + ) + break + p_logger = Process( target=logger, args=( @@ -657,5 +679,5 @@ def process_arguments(args): None, # qemu_pre None, # qemu_post None, # logger_postprocess - parguments["unicorn_emulation"], # enable unicorn emulation + parguments["unicorn_emulation"], # enable unicorn emulation ) diff --git a/faultclass.py b/faultclass.py index fa6e3cb..116aaca 100644 --- a/faultclass.py +++ b/faultclass.py @@ -405,6 +405,14 @@ def readout_tb_faulted(line): return tbfaulted +def readout_memmap(line): + split = line.split("|") + memmap = {} + memmap["address"] = int(split[0], 16) + memmap["length"] = int(split[1], 16) + return memmap + + def readout_data( pipe, index, @@ -430,10 +438,12 @@ def readout_data( memdumptmp = [] registerlist = [] tbfaultedlist = [] + memmaplist = [] tbinfo = 0 tbexec = 0 meminfo = 0 memdump = 0 + memmap = 0 endpoint = 0 end_reason = "" max_ram_usage = 0 @@ -476,6 +486,10 @@ def readout_data( split = line.split("]:") architecture = split[1].strip() + elif "[Memory Map]" in line: + state = "memmap" + memmap = 1 + elif "[END]" in line: state = "none" logger.info( @@ -538,6 +552,9 @@ def readout_data( output["end_reason"] = end_reason output["architecture"] = architecture + if memmap == 1: + output["memmaplist"] = memmaplist + if memdump == 1: output["memdumplist"] = memdumplist @@ -557,7 +574,7 @@ def readout_data( regtype = "arm" elif "[RiscV Registers]" in line: state = "riscvregisters" - regtype = "riscv" + regtype = "riscv64" elif "[TB Faulted]" in line: state = "tbfaulted" tbfaulted = 1 @@ -594,6 +611,8 @@ def readout_data( registerlist.append(readout_riscv_registers(line)) elif "tbfaulted" in state: tbfaultedlist.append(readout_tb_faulted(line)) + elif "memmap" in state: + memmaplist.append(readout_memmap(line)) else: logger.warning("In exp {} unknown state {}".format(index, line)) return max_ram_usage @@ -650,8 +669,10 @@ def configure_qemu(control, config_qemu, num_faults, memorydump_list, index): out = out + "$$ num_faults: {}\n".format(num_faults) if index is -2: + out = out + "$$enable_memmap_dump\n" out = out + "$$enable_full_mem_dump\n" else: + out = out + "$$disable_memmap_dump\n" out = out + "$$disable_full_mem_dump\n" if "tb_exec_list" in config_qemu: @@ -823,17 +844,20 @@ def python_worker_unicorn( goldenrun_data["tbinfo"], index, ) - output["tbexec"] = write_output_wrt_goldenrun("tbexec", pdtbexeclist, goldenrun_data) + output["tbexec"] = write_output_wrt_goldenrun( + "tbexec", pdtbexeclist, goldenrun_data + ) output["tbinfo"] = write_output_wrt_goldenrun("tbinfo", tblist, goldenrun_data) - output["armregisters"] = write_output_wrt_goldenrun("armregisters", pd.DataFrame(logs["registerlist"], dtype="UInt64"), goldenrun_data) + regtype = pregoldenrun_data["architecture"] + output[f"{regtype}registers"] = pd.DataFrame( + logs["registerlist"], dtype="UInt64" + ).to_dict("records") queue_output.put(output) logger.info( - "Python worker for experiment {} done. Took {}s".format( - index, time.time() - t0 - ) + "Python worker for experiment {} done. Took {}s".format(index, time.time() - t0) ) return diff --git a/faultplugin/memmapdump.c b/faultplugin/memmapdump.c index 6648b61..0ec6602 100644 --- a/faultplugin/memmapdump.c +++ b/faultplugin/memmapdump.c @@ -36,8 +36,10 @@ struct FlatRange { bool dump_memmap_information(Int128 start, Int128 len, const MemoryRegion *mr, hwaddr offset_in_region, void *opaque) { g_autoptr(GString) out = g_string_new(""); - g_string_printf(out, "$$ 0x%lx | 0x%lx \n", int128_get64(start), int128_get64(len)); - plugin_write_to_data_pipe(out->str, out->len); + if (mr->ram) { + g_string_printf(out, "$$ 0x%lx | 0x%lx \n", int128_get64(start), int128_get64(len)); + plugin_write_to_data_pipe(out->str, out->len); + } return false; } diff --git a/goldenrun.py b/goldenrun.py index f6df31f..2ad6fe2 100644 --- a/goldenrun.py +++ b/goldenrun.py @@ -99,7 +99,12 @@ def run_goldenrun( ) ) - return [config_qemu["max_instruction_count"], experiments[0]["data"], experiment["data"], faultconfig] + return [ + config_qemu["max_instruction_count"], + experiments[0]["data"], + experiment["data"], + faultconfig, + ] def find_insn_addresses_in_tb(insn_address, data): diff --git a/requirements.txt b/requirements.txt index 8736a4a..316e21b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,3 +2,4 @@ pandas~=1.5 python-prctl==1.8.1 tables==3.7.0 json5==0.9.10 +pyelftools==0.29 From 1dcf389bb2ceb3c71b41e0442e5d0a057dc4d0cb Mon Sep 17 00:00:00 2001 From: Kevin Schneider Date: Fri, 3 Mar 2023 14:53:55 +0100 Subject: [PATCH 12/13] add logging framework --- controller.py | 9 ++- emulation_worker/Cargo.lock | 113 +++++++++++++++++++++++++++ emulation_worker/Cargo.toml | 3 + emulation_worker/src/architecture.rs | 20 +++++ emulation_worker/src/hooks.rs | 24 +++--- emulation_worker/src/hooks/util.rs | 3 +- emulation_worker/src/lib.rs | 63 ++++++++++++--- emulation_worker/src/structs.rs | 2 +- faultclass.py | 10 ++- 9 files changed, 217 insertions(+), 30 deletions(-) diff --git a/controller.py b/controller.py index ab35585..8effdc0 100755 --- a/controller.py +++ b/controller.py @@ -247,7 +247,7 @@ def controller( num_workers, queuedepth, compressionlevel, - qemu_output, + engine_output, goldenrun=True, logger=hdf5collector, qemu_pre=None, @@ -283,7 +283,7 @@ def controller( goldenrun_data, faultlist, ] = run_goldenrun( - config_qemu, qemu_output, queue_output, faultlist, qemu_pre, qemu_post + config_qemu, engine_output, queue_output, faultlist, qemu_pre, qemu_post ) pickle.dump( ( @@ -376,6 +376,7 @@ def controller( config_qemu, faults["index"], queue_output, + engine_output, pregoldenrun_data, goldenrun_data, True, @@ -390,7 +391,7 @@ def controller( config_qemu, faults["index"], queue_output, - qemu_output, + engine_output, goldenrun_data, True, queue_ram_usage, @@ -673,7 +674,7 @@ def process_arguments(args): parguments["num_workers"], # num_workers parguments["queuedepth"], # queuedepth parguments["compressionlevel"], # compressionlevel - args.debug, # qemu_output + args.debug, # engine_output parguments["goldenrun"], # goldenrun hdf5collector, # logger None, # qemu_pre diff --git a/emulation_worker/Cargo.lock b/emulation_worker/Cargo.lock index 15d0e00..8eaec9d 100644 --- a/emulation_worker/Cargo.lock +++ b/emulation_worker/Cargo.lock @@ -60,9 +60,12 @@ name = "emulation_worker" version = "0.1.0" dependencies = [ "capstone", + "log", "num", "priority-queue", "pyo3", + "simplelog", + "time", "unicorn-engine", ] @@ -88,6 +91,12 @@ version = "1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da2d6f23ffea9d7e76c53eee25dfb67bcd8fde7f1198b0855350698c9f07c780" +[[package]] +name = "itoa" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "453ad9f582a441959e5f0d088b02ce04cfe8d51a8eaf077f12ac6d3e94164ca6" + [[package]] name = "libc" version = "0.2.139" @@ -104,6 +113,15 @@ dependencies = [ "scopeguard", ] +[[package]] +name = "log" +version = "0.4.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" +dependencies = [ + "cfg-if", +] + [[package]] name = "memoffset" version = "0.6.5" @@ -189,6 +207,15 @@ dependencies = [ "autocfg", ] +[[package]] +name = "num_threads" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2819ce041d2ee131036f4fc9d6ae7ae125a3a40e97ba64d04fe799ad9dabbb44" +dependencies = [ + "libc", +] + [[package]] name = "once_cell" version = "1.17.0" @@ -327,6 +354,23 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" +[[package]] +name = "serde" +version = "1.0.152" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb7d1f0d3021d347a83e556fc4683dea2ea09d87bccdf88ff5c12545d89d5efb" + +[[package]] +name = "simplelog" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acee08041c5de3d5048c8b3f6f13fafb3026b24ba43c6a695a0c76179b844369" +dependencies = [ + "log", + "termcolor", + "time", +] + [[package]] name = "smallvec" version = "1.10.0" @@ -350,6 +394,44 @@ version = "0.12.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9410d0f6853b1d94f0e519fb95df60f29d2c1eff2d921ffdf01a4c8a3b54f12d" +[[package]] +name = "termcolor" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "time" +version = "0.3.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd0cbfecb4d19b5ea75bb31ad904eb5b9fa13f21079c3b92017ebdf4999a5890" +dependencies = [ + "itoa", + "libc", + "num_threads", + "serde", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e153e1f1acaef8acc537e68b44906d2db6436e2b35ac2c6b42640fff91f00fd" + +[[package]] +name = "time-macros" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd80a657e71da814b8e5d60d3374fc6d35045062245d80224748ae522dd76f36" +dependencies = [ + "time-core", +] + [[package]] name = "unicode-ident" version = "1.0.6" @@ -375,6 +457,37 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e1766d682d402817b5ac4490b3c3002d91dfa0d22812f341609f97b08757359c" +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" +dependencies = [ + "winapi", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + [[package]] name = "windows-sys" version = "0.42.0" diff --git a/emulation_worker/Cargo.toml b/emulation_worker/Cargo.toml index 4935506..8b89aaf 100644 --- a/emulation_worker/Cargo.toml +++ b/emulation_worker/Cargo.toml @@ -14,3 +14,6 @@ unicorn-engine = "2.0.0" num = "0.4.0" priority-queue = "1.3.1" capstone = "0.11.0" +log = "0.4.17" +simplelog = "0.12.1" +time = "0.3.20" diff --git a/emulation_worker/src/architecture.rs b/emulation_worker/src/architecture.rs index 93f01da..dbb4565 100644 --- a/emulation_worker/src/architecture.rs +++ b/emulation_worker/src/architecture.rs @@ -1,3 +1,4 @@ +use capstone::{prelude::BuildsCapstone, prelude::BuildsCapstoneExtraMode, Capstone}; use pyo3::types::PyDict; use std::collections::HashMap; use std::sync::RwLockWriteGuard; @@ -71,6 +72,7 @@ pub enum Architecture { pub trait ArchitectureDependentOperations { fn initialize_unicorn(&self) -> Unicorn<()>; + fn initialize_cs_engine(&self) -> Capstone; fn initialize_registers( &self, uc: &mut Unicorn<()>, @@ -127,6 +129,24 @@ impl ArchitectureDependentOperations for ArchitectureDependentOperator { } } + fn initialize_cs_engine(self: &ArchitectureDependentOperator) -> Capstone { + match self.architecture { + Architecture::Arm => Capstone::new() + .arm() + .mode(capstone::arch::arm::ArchMode::Thumb) + .build() + .unwrap(), + Architecture::Riscv => Capstone::new() + .riscv() + .mode(capstone::arch::riscv::ArchMode::RiscV64) + .extra_mode(std::iter::once( + capstone::arch::riscv::ArchExtraMode::RiscVC, + )) + .build() + .unwrap(), + } + } + fn dump_registers( self: &ArchitectureDependentOperator, uc: &mut Unicorn<()>, diff --git a/emulation_worker/src/hooks.rs b/emulation_worker/src/hooks.rs index 2bd0ace..f5cf28c 100644 --- a/emulation_worker/src/hooks.rs +++ b/emulation_worker/src/hooks.rs @@ -1,3 +1,4 @@ +use log::{debug, info}; use num::{BigUint, ToPrimitive}; use pyo3::types::{PyDict, PyList}; use std::{collections::HashMap, io, sync::Arc}; @@ -70,15 +71,15 @@ fn end_hook_cb( let counter = endpoints.get_mut(&address).unwrap(); if *counter > 1 { - println!( - "Decreasing endpoint counter for {:?} to {:?}", + debug!( + "Decreasing endpoint counter for 0x{:x} to {:?}", address, *counter ); *counter -= 1; } else { let mut endpoint = state.logs.endpoint.write().unwrap(); *endpoint = (address == first_endpoint, address, 1); - println!("Reached endpoint at {address:?}"); + info!("Reached endpoint at 0x{address:x}"); // Since this hook has been registered before the single step hook we need to call it // manullay to log the last instruction, since the callback would not be called otherwise @@ -102,6 +103,11 @@ fn end_hook_cb( } fn single_step_hook_cb(uc: &mut Unicorn<'_, ()>, address: u64, size: u32, state: &Arc) { + debug!( + "Single step\taddr 0x{:x}\tinstructions {:?}", + address, + uc.mem_read_as_vec(address, size as usize).unwrap() + ); let mut instruction_count = state.instruction_count.write().unwrap(); let undone_fault = undo_faults( uc, @@ -111,7 +117,7 @@ fn single_step_hook_cb(uc: &mut Unicorn<'_, ()>, address: u64, size: u32, state: ); if let Some(fault) = undone_fault { - if matches!(fault.kind, FaultType::Register) { + if !matches!(fault.kind, FaultType::Register) { dump_memory( uc, address, @@ -152,10 +158,10 @@ fn mem_hook_cb( meminfo.insert( (address, pc), MemInfo { - ins: address, + ins: pc, counter: 1, direction: if mem_type == MemType::READ { 0 } else { 1 }, - address: pc, + address, tbid: last_tbid, size, }, @@ -177,7 +183,7 @@ fn fault_hook_cb(uc: &mut Unicorn<'_, ()>, address: u64, _size: u32, state: &Arc return; } - println!("Executing fault at {address:?}"); + info!("Executing fault at 0x{address:x}"); let prefault_data; @@ -190,8 +196,8 @@ fn fault_hook_cb(uc: &mut Unicorn<'_, ()>, address: u64, _size: u32, state: &Arc .as_slice(), ); prefault_data = data.clone(); - println!( - "Overwriting {:?} with {:?}", + debug!( + "Overwriting 0x{:x} with 0x{:x}", data, apply_model(&data, fault) ); diff --git a/emulation_worker/src/hooks/util.rs b/emulation_worker/src/hooks/util.rs index c2b675a..d45c253 100644 --- a/emulation_worker/src/hooks/util.rs +++ b/emulation_worker/src/hooks/util.rs @@ -1,5 +1,6 @@ use crate::structs::{TbExecEntry, TbInfoBlock}; use crate::{Fault, FaultModel, FaultType, MemDump}; +use log::debug; use num::{BigUint, ToPrimitive}; use priority_queue::PriorityQueue; use std::collections::HashMap; @@ -85,7 +86,7 @@ pub fn undo_faults( let fault = faults.get(&address).unwrap(); - println!("Undoing fault"); + debug!("Undoing fault"); match fault.kind { FaultType::Register => { uc.reg_write(fault.address as i32, prefault_data.to_u64().unwrap()) diff --git a/emulation_worker/src/lib.rs b/emulation_worker/src/lib.rs index fe78673..8622012 100644 --- a/emulation_worker/src/lib.rs +++ b/emulation_worker/src/lib.rs @@ -1,4 +1,4 @@ -use capstone::{prelude::BuildsCapstone, Capstone}; +use log::{debug, error, info, warn, LevelFilter}; use priority_queue::PriorityQueue; use pyo3::{ prelude::*, @@ -6,6 +6,7 @@ use pyo3::{ }; use std::collections::HashMap; use std::sync::{Arc, RwLock}; +use time::macros::format_description; use unicorn_engine::unicorn_const::Permission; @@ -20,12 +21,40 @@ use crate::structs::{Fault, FaultModel, FaultType, Logs, MemDump, MemInfo, State mod hooks; use crate::hooks::initialize_hooks; +fn setup_logging(index: u64, debug: bool) { + let config = simplelog::ConfigBuilder::new() + .set_time_format_custom(format_description!( + "[year]-[month]-[day] [hour]:[minute]:[second],[subsecond digits:3]" + )) + .build(); + let mut loggers: Vec> = Vec::new(); + if debug { + loggers.push(simplelog::WriteLogger::new( + LevelFilter::Debug, + config.clone(), + std::fs::File::create(format!("log_{index:?}.txt")).unwrap(), + )); + } + let log_level = if debug { + LevelFilter::Debug + } else { + LevelFilter::Info + }; + loggers.push(simplelog::SimpleLogger::new(log_level, config)); + + simplelog::CombinedLogger::init(loggers).unwrap(); +} + #[pyfunction] fn run_unicorn( pregoldenrun_data: &PyDict, faults: Vec, config: &PyDict, + index: u64, + engine_output: bool, ) -> PyResult { + setup_logging(index, engine_output); + let arch_str = pregoldenrun_data .get_item("architecture") .unwrap() @@ -64,7 +93,11 @@ fn run_unicorn( let memmap: &PyDict = obj.extract()?; let address: u64 = memmap.get_item("address").unwrap().extract()?; let length: usize = memmap.get_item("length").unwrap().extract()?; - //println!("Mapping memory at {:x} size {:x}", address & (u64::MAX ^0xfff), cmp::max(length, 0x1000)); + debug!( + "Mapping memory at 0x{:x} size 0x{:x}", + address & (u64::MAX ^ 0xfff), + usize::max(length, 0x1000) + ); match emu.mem_map( address & (u64::MAX ^ 0xfff), usize::max(length, 0x1000), @@ -72,10 +105,10 @@ fn run_unicorn( ) { Ok(()) => {} Err(unicorn_engine::unicorn_const::uc_error::MAP) => { - println!("Memory space is already mapped. Ignoring...") + warn!("Memory space is already mapped. Ignoring...") } Err(unicorn_engine::unicorn_const::uc_error::NOMEM) => { - println!("Memory space too big, cannot allocate. Ignoring...") + warn!("Memory space too big, cannot allocate. Ignoring...") } Err(err) => panic!("failed mapping memory: {err:?}"), } @@ -90,9 +123,10 @@ fn run_unicorn( let address: u64 = memdump.get_item("address").unwrap().extract()?; let dumps: &PyList = memdump.get_item("dumps").unwrap().extract()?; let dump: Vec = dumps.get_item(0).unwrap().extract()?; + debug!("writing {:?} bytes to 0x{:x}", dump.len(), address); emu.mem_write(address, dump.as_slice()) - .expect("failed to write instructions"); + .unwrap_or_else(|_| error!("failed to write dumped data at 0x{:X}", address)); } let logs = Logs { @@ -112,11 +146,7 @@ fn run_unicorn( live_faults: RwLock::new(PriorityQueue::new()), instruction_count: RwLock::new(0), single_step_hook_handle: RwLock::new(None), - cs_engine: Capstone::new() - .arm() - .mode(capstone::arch::arm::ArchMode::Thumb) - .build() - .unwrap(), + cs_engine: arch_operator.initialize_cs_engine(), arch_operator: arch_operator.clone(), logs, }; @@ -130,8 +160,19 @@ fn run_unicorn( .unwrap() .extract()?; + info!("Starting emulation at 0x{:x}", start_address); emu.emu_start(start_address, 0, 0, max_instruction_count) - .unwrap_or_else(|_| println!("failed to emulate code at 0x{:x}", emu.pc_read().unwrap())); + .unwrap_or_else(|_| error!("failed to emulate code at 0x{:x}", emu.pc_read().unwrap())); + + { + let state = Arc::clone(&state_arc); + state.arch_operator.dump_registers( + emu, + state.logs.registerlist.write().unwrap(), + *state.tbcounter.read().unwrap(), + ); + } + info!("Finished emulation"); Python::with_gil(|py| Ok(state_arc.logs.to_object(py))) } diff --git a/emulation_worker/src/structs.rs b/emulation_worker/src/structs.rs index adbcfda..6ad3c22 100644 --- a/emulation_worker/src/structs.rs +++ b/emulation_worker/src/structs.rs @@ -173,7 +173,7 @@ impl ToPyObject for Logs { let endpoint = self.endpoint.read().unwrap(); if endpoint.2 == 1 { - dict.set_item("end_reason", format!("{}/1", endpoint.1)) + dict.set_item("end_reason", format!("endpoint {}/1", endpoint.1)) .unwrap(); } else { dict.set_item("end_reason", "max tb").unwrap(); diff --git a/faultclass.py b/faultclass.py index 116aaca..2756782 100644 --- a/faultclass.py +++ b/faultclass.py @@ -726,7 +726,7 @@ def python_worker( config_qemu, index, queue_output, - qemu_output, + engine_output, goldenrun_data=None, change_nice=False, queue_ram_usage=None, @@ -760,7 +760,7 @@ def python_worker( paths["config"], paths["data"], config_qemu, - qemu_output, + engine_output, index, qemu_custom_paths, ), @@ -817,6 +817,7 @@ def python_worker_unicorn( config_qemu, index, queue_output, + engine_output, pregoldenrun_data, goldenrun_data, change_nice=False, @@ -825,8 +826,8 @@ def python_worker_unicorn( if change_nice: os.nice(19) - logs = run_unicorn(pregoldenrun_data, fault_list, config_qemu) - logger.info(f"Ended qemu for exp {index}! Took {time.time() - t0}") + logs = run_unicorn(pregoldenrun_data, fault_list, config_qemu, index, engine_output) + logger.info(f"Ended unicorn for exp {index}! Took {time.time() - t0}") output = {} @@ -835,6 +836,7 @@ def python_worker_unicorn( output["endpoint"] = logs["endpoint"] output["end_reason"] = logs["end_reason"] output["memdumplist"] = logs["memdumplist"] + output["meminfo"] = logs["meminfo"] pdtbexeclist = pd.DataFrame(logs["tbexec"]) [pdtbexeclist, tblist] = filter_tb( From efb110658ee4d6fc0d1988cd5b2d0dd6e661b37d Mon Sep 17 00:00:00 2001 From: Kevin Schneider Date: Tue, 25 Apr 2023 16:10:14 +0200 Subject: [PATCH 13/13] add unicorn dependency as submodule; remove modified tbs from cache during emulation --- .github/workflows/lint.yml | 1 + .gitmodules | 3 +++ emulation_worker/Cargo.lock | 4 +--- emulation_worker/Cargo.toml | 4 +++- emulation_worker/src/hooks.rs | 6 ++++++ emulation_worker/src/hooks/util.rs | 8 ++++++++ emulation_worker/unicorn | 1 + 7 files changed, 23 insertions(+), 4 deletions(-) create mode 160000 emulation_worker/unicorn diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 153a6db..145b5d9 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -42,4 +42,5 @@ jobs: args: --all --manifest-path ./emulation_worker/Cargo.toml -- --check - run: | cd emulation_worker + git submodule update --init unicorn cargo clippy -- -D warnings diff --git a/.gitmodules b/.gitmodules index a1a8b17..9b861d2 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "qemu"] path = qemu url = https://github.com/Fraunhofer-AISEC/archie-qemu.git +[submodule "emulation_worker/unicorn"] + path = emulation_worker/unicorn + url = https://github.com/unicorn-engine/unicorn.git diff --git a/emulation_worker/Cargo.lock b/emulation_worker/Cargo.lock index 8eaec9d..f52daec 100644 --- a/emulation_worker/Cargo.lock +++ b/emulation_worker/Cargo.lock @@ -440,9 +440,7 @@ checksum = "84a22b9f218b40614adcb3f4ff08b703773ad44fa9423e4e0d346d5db86e4ebc" [[package]] name = "unicorn-engine" -version = "2.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3b881bfd9837ff4f62e81a1e64b40a584604375ae0a73d0d5f09b7a72350b96" +version = "2.0.2" dependencies = [ "bitflags", "cc", diff --git a/emulation_worker/Cargo.toml b/emulation_worker/Cargo.toml index 8b89aaf..96d2a00 100644 --- a/emulation_worker/Cargo.toml +++ b/emulation_worker/Cargo.toml @@ -2,6 +2,8 @@ name = "emulation_worker" version = "0.1.0" edition = "2021" +authors = ["Kevin Schneider"] +license = "Apache-2.0" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [lib] @@ -10,7 +12,7 @@ crate-type = ["cdylib"] [dependencies] pyo3 = { version = "0.17.3", features = ["extension-module"] } -unicorn-engine = "2.0.0" +unicorn-engine = { path = "./unicorn" } num = "0.4.0" priority-queue = "1.3.1" capstone = "0.11.0" diff --git a/emulation_worker/src/hooks.rs b/emulation_worker/src/hooks.rs index f5cf28c..7165aef 100644 --- a/emulation_worker/src/hooks.rs +++ b/emulation_worker/src/hooks.rs @@ -211,6 +211,12 @@ fn fault_hook_cb(uc: &mut Unicorn<'_, ()>, address: u64, _size: u32, state: &Arc fault_data.extend(std::iter::repeat(0).take(fault_size as usize - fault_data.len())); uc.mem_write(fault.address, fault_data.as_slice()) .expect("failed writing fault data to memory"); + if matches!(fault.kind, FaultType::Instruction) { + // We need to remove the tb containing the modified instructions from the cache + // since they might not have any effect otherwise + uc.ctl_remove_cache(fault.address, fault.address + fault_size as u64) + .unwrap(); + } dump_memory( uc, fault.address, diff --git a/emulation_worker/src/hooks/util.rs b/emulation_worker/src/hooks/util.rs index d45c253..b5b5234 100644 --- a/emulation_worker/src/hooks/util.rs +++ b/emulation_worker/src/hooks/util.rs @@ -95,6 +95,14 @@ pub fn undo_faults( FaultType::Data | FaultType::Instruction => { uc.mem_write(fault.address, prefault_data.to_bytes_le().as_slice()) .expect("failed restoring memory value"); + if matches!(fault.kind, FaultType::Instruction) { + //uc.ctl_arg_2(UC_CTL_TB_REMOVE_CACHE | UC_CTL_IO_WRITE, unsafe { std::mem::transmute::(address) }, unsafe { std::mem::transmute::(fault.address + prefault_data.to_bytes_le().len() as u64) }).unwrap(); + uc.ctl_remove_cache( + fault.address, + fault.address + prefault_data.to_bytes_le().len() as u64, + ) + .unwrap(); + } } } diff --git a/emulation_worker/unicorn b/emulation_worker/unicorn new file mode 160000 index 0000000..bde3cd7 --- /dev/null +++ b/emulation_worker/unicorn @@ -0,0 +1 @@ +Subproject commit bde3cd7dae991caadf38243771f70ab2c6e1a2b2