diff --git a/examples/naive/armv7m/armv7m_simple0_func.s b/examples/naive/armv7m/armv7m_simple0_func.s index a0c3ccf9..9b757e40 100644 --- a/examples/naive/armv7m/armv7m_simple0_func.s +++ b/examples/naive/armv7m/armv7m_simple0_func.s @@ -1,10 +1,10 @@ .syntax unified //.cpu cortex-m4 // llvm-mc does not like this... -//.thumb // unicorn seems to get confused by this... +.thumb // unicorn seems to get confused by this... .align 2 .global my_func -// .type my_func, %function // llvm-mc does not like this... +.type my_func, %function my_func: push {r4-r11, lr} diff --git a/slothy/core/core.py b/slothy/core/core.py index c36d9863..f320d241 100644 --- a/slothy/core/core.py +++ b/slothy/core/core.py @@ -897,7 +897,12 @@ def run_code(code, txt=None): mu.mem_map(RAM_BASE, RAM_SZ) mu.mem_write(RAM_BASE, initial_memory) # Run emulator - mu.emu_start(CODE_BASE + offset, CODE_BASE + len(objcode)) + try: + mu.emu_start(CODE_BASE + offset, CODE_BASE + len(objcode)) + except: + log.error("Failed to emulate code using unicorn engine") + log.error("Code") + log.error(SouceLine.write_multiline(code)) final_register_contents = {} for r in regs: diff --git a/slothy/helper.py b/slothy/helper.py index ddc53e4f..3e0e2c22 100644 --- a/slothy/helper.py +++ b/slothy/helper.py @@ -1146,10 +1146,40 @@ def parse_as_int(s): raise LLVM_Mc_Error(f"Could not find unambiguous text section in object file. Sections: {sections}") return sections_with_offsets[text_section[0]] + @staticmethod + def llvm_mc_output_extract_symbol(objfile, symbol): + """Extracts symbol from an objectfile emitted by llvm-mc""" + + # Feed object file through llvm-readobj + r = subprocess.run(["llvm-readobj", "-s", "-"], input=objfile, capture_output=True, check=True) + objfile_txt = r.stdout.decode().split("\n") + + # So we look for lines "Name: ..." and lines "Value: ...". + def parse_as_int(s): + if s.startswith("0x"): + return int(s, base=16) + else: + return int(s,base=10) + + symbols = filter(lambda l: l.strip().startswith("Name: "), objfile_txt) + symbols = list(map(lambda l: l.strip().removeprefix("Name: ").split(' ')[0].strip(), symbols)) + values = filter(lambda l: l.strip().startswith("Value: "), objfile_txt) + values = map(lambda l: parse_as_int(l.strip().removeprefix("Value: ")), values) + symbols_with_values = { s:val for (s,val) in zip(symbols, values) } + matching_symbols = list(filter(lambda s: s.endswith(symbol), symbols)) + # Sometimes assemble functions are named both `_foo` and `foo`, in which case we'd find + # multiple matching symbols -- however, they'd have the same value. Hence, only fail if + # there are multiple matching symbols of _different_ values. + if len({ symbols_with_values[s] for s in matching_symbols }) != 1: + raise LLVM_Mc_Error(f"Could not find unambiguous symbol {symbol} in object file. Symbols: {symbols}") + return symbols_with_values[matching_symbols[0]] + @staticmethod def assemble(source, arch, attr, log, symbol=None, preprocessor=None, include_paths=None): """Runs LLVM-MC tool to assemble `source`, returning byte code""" + thumb = "thumb" in arch or (attr is not None and "thumb" in attr) + # Unfortunately, there is no option to directly extract byte code # from LLVM-MC: One either gets a textual description, or an object file. # To not introduce another binary dependency, we just extract the byte @@ -1157,7 +1187,10 @@ def assemble(source, arch, attr, log, symbol=None, preprocessor=None, include_pa # has a "encoding: [byte0, byte1, ...]" comment at the end. if symbol is None: + if thumb is True: + source = [SourceLine(".thumb")] + source source = [SourceLine(".global harness"), + SourceLine(".type harness, %function"), SourceLine("harness:")] + source symbol = "harness" @@ -1167,8 +1200,13 @@ def assemble(source, arch, attr, log, symbol=None, preprocessor=None, include_pa source = CPreprocessor.unfold([], source, [], preprocessor, include=include_paths) except subprocess.CalledProcessError as exc: + log.error("CPreprocessor failed on the following input") + log.error(SouceLine.write_multiline(source)) raise LLVM_Mc_Error from exc + if platform.system() == "Darwin": + source = list(filter(lambda s: s.text.strip().startswith(".type") is False, source)) + code = SourceLine.write_multiline(source) log.debug(f"Calling LLVM MC assmelber on the following code") @@ -1184,6 +1222,8 @@ def assemble(source, arch, attr, log, symbol=None, preprocessor=None, include_pa r = subprocess.run(["llvm-mc"] + args, input=code.encode(), capture_output=True, check=True) except subprocess.CalledProcessError as exc: + log.error("llvm-mc failed to handle the following code") + log.error(code) raise LLVM_Mc_Error from exc args = [f"--arch={arch}", "--assemble", "--filetype=obj"] @@ -1201,11 +1241,10 @@ def assemble(source, arch, attr, log, symbol=None, preprocessor=None, include_pa offset, sz = LLVM_Mc.llvm_mc_output_extract_text_section(objfile) code = objfile[offset:offset+sz] - # Extract symbol table - r = subprocess.run(["llvm-nm","-"], input=objfile, capture_output=True) - out = r.stdout.decode() - symbol = next(filter(lambda l: symbol in l, out.split("\n"))) - offset = int(symbol.split(" ")[0], base=16) + offset = LLVM_Mc.llvm_mc_output_extract_symbol(objfile, symbol) + + if platform.system() == "Darwin" and thumb is True: + offset += 1 return code, offset diff --git a/slothy/targets/arm_v7m/arch_v7m.py b/slothy/targets/arm_v7m/arch_v7m.py index 7d8b3412..9e415ea6 100644 --- a/slothy/targets/arm_v7m/arch_v7m.py +++ b/slothy/targets/arm_v7m/arch_v7m.py @@ -13,11 +13,11 @@ from sympy import simplify llvm_mca_arch = "arm" -llvm_mc_arch = "arm" ### TODO: What to put here? -llvm_mc_attr = "armv5te,thumb2,dsp" ### TODO: What to put here? +llvm_mc_arch = "thumb" +llvm_mc_attr = "armv7e-m,thumb2,dsp,fpregs" unicorn_arch = UC_ARCH_ARM -unicorn_mode = UC_MODE_ARM +unicorn_mode = UC_MODE_THUMB | UC_MODE_MCLASS class RegisterType(Enum): GPR = 1