Skip to content

Commit

Permalink
Selftest: Add support for Armv7-M thumb
Browse files Browse the repository at this point in the history
Previously, the selftest would fail for Armv7-M when the code
under test uses both 16-bit and 32-bit instructions. This had
multiple reasons:
- Bad architecture an mode parameters for LLVM-MC and Unicorn
  in arch_v7m.py.
- Missing `.thumb` and `.type _, %function` in the kernels assembled
  by the local selftest, leading to unset low-bits indicating the
  type of the code.

A further issue existed on Mac: Even when assembling non-native
architectures, LLVM-MC on Mac still emits a Mach-O object format,
and Mach-O does not support the `.type` directive. To remedy, the
`.type` directive is dynamically commented out from kernels to
assembly, and the low-bit of the function symbol addresses
set manually for thumb architectures.

Signed-off-by: Hanno Becker <[email protected]>
  • Loading branch information
hanno-becker committed Dec 11, 2024
1 parent 4c7b177 commit 4317ce7
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 11 deletions.
4 changes: 2 additions & 2 deletions examples/naive/armv7m/armv7m_simple0_func.s
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
.syntax unified
//.cpu cortex-m4 // llvm-mc does not like this...
//.thumb // unicorn seems to get confused by this...
.thumb // unicorn seems to get confused by this...

.align 2
.global my_func
// .type my_func, %function // llvm-mc does not like this...
.type my_func, %function
my_func:
push {r4-r11, lr}

Expand Down
7 changes: 6 additions & 1 deletion slothy/core/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -897,7 +897,12 @@ def run_code(code, txt=None):
mu.mem_map(RAM_BASE, RAM_SZ)
mu.mem_write(RAM_BASE, initial_memory)
# Run emulator
mu.emu_start(CODE_BASE + offset, CODE_BASE + len(objcode))
try:
mu.emu_start(CODE_BASE + offset, CODE_BASE + len(objcode))
except:
log.error("Failed to emulate code using unicorn engine")
log.error("Code")
log.error(SouceLine.write_multiline(code))

final_register_contents = {}
for r in regs:
Expand Down
49 changes: 44 additions & 5 deletions slothy/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -1146,18 +1146,51 @@ def parse_as_int(s):
raise LLVM_Mc_Error(f"Could not find unambiguous text section in object file. Sections: {sections}")
return sections_with_offsets[text_section[0]]

@staticmethod
def llvm_mc_output_extract_symbol(objfile, symbol):
"""Extracts symbol from an objectfile emitted by llvm-mc"""

# Feed object file through llvm-readobj
r = subprocess.run(["llvm-readobj", "-s", "-"], input=objfile, capture_output=True, check=True)
objfile_txt = r.stdout.decode().split("\n")

# So we look for lines "Name: ..." and lines "Value: ...".
def parse_as_int(s):
if s.startswith("0x"):
return int(s, base=16)
else:
return int(s,base=10)

symbols = filter(lambda l: l.strip().startswith("Name: "), objfile_txt)
symbols = list(map(lambda l: l.strip().removeprefix("Name: ").split(' ')[0].strip(), symbols))
values = filter(lambda l: l.strip().startswith("Value: "), objfile_txt)
values = map(lambda l: parse_as_int(l.strip().removeprefix("Value: ")), values)
symbols_with_values = { s:val for (s,val) in zip(symbols, values) }
matching_symbols = list(filter(lambda s: s.endswith(symbol), symbols))
# Sometimes assemble functions are named both `_foo` and `foo`, in which case we'd find
# multiple matching symbols -- however, they'd have the same value. Hence, only fail if
# there are multiple matching symbols of _different_ values.
if len({ symbols_with_values[s] for s in matching_symbols }) != 1:
raise LLVM_Mc_Error(f"Could not find unambiguous symbol {symbol} in object file. Symbols: {symbols}")
return symbols_with_values[matching_symbols[0]]

@staticmethod
def assemble(source, arch, attr, log, symbol=None, preprocessor=None, include_paths=None):
"""Runs LLVM-MC tool to assemble `source`, returning byte code"""

thumb = "thumb" in arch or (attr is not None and "thumb" in attr)

# Unfortunately, there is no option to directly extract byte code
# from LLVM-MC: One either gets a textual description, or an object file.
# To not introduce another binary dependency, we just extract the byte
# code directly from the textual output, which for every assembly line
# has a "encoding: [byte0, byte1, ...]" comment at the end.

if symbol is None:
if thumb is True:
source = [SourceLine(".thumb")] + source
source = [SourceLine(".global harness"),
SourceLine(".type harness, %function"),
SourceLine("harness:")] + source
symbol = "harness"

Expand All @@ -1167,8 +1200,13 @@ def assemble(source, arch, attr, log, symbol=None, preprocessor=None, include_pa
source = CPreprocessor.unfold([], source, [], preprocessor,
include=include_paths)
except subprocess.CalledProcessError as exc:
log.error("CPreprocessor failed on the following input")
log.error(SouceLine.write_multiline(source))
raise LLVM_Mc_Error from exc

if platform.system() == "Darwin":
source = list(filter(lambda s: s.text.strip().startswith(".type") is False, source))

code = SourceLine.write_multiline(source)

log.debug(f"Calling LLVM MC assmelber on the following code")
Expand All @@ -1184,6 +1222,8 @@ def assemble(source, arch, attr, log, symbol=None, preprocessor=None, include_pa
r = subprocess.run(["llvm-mc"] + args,
input=code.encode(), capture_output=True, check=True)
except subprocess.CalledProcessError as exc:
log.error("llvm-mc failed to handle the following code")
log.error(code)
raise LLVM_Mc_Error from exc

args = [f"--arch={arch}", "--assemble", "--filetype=obj"]
Expand All @@ -1201,11 +1241,10 @@ def assemble(source, arch, attr, log, symbol=None, preprocessor=None, include_pa
offset, sz = LLVM_Mc.llvm_mc_output_extract_text_section(objfile)
code = objfile[offset:offset+sz]

# Extract symbol table
r = subprocess.run(["llvm-nm","-"], input=objfile, capture_output=True)
out = r.stdout.decode()
symbol = next(filter(lambda l: symbol in l, out.split("\n")))
offset = int(symbol.split(" ")[0], base=16)
offset = LLVM_Mc.llvm_mc_output_extract_symbol(objfile, symbol)

if platform.system() == "Darwin" and thumb is True:
offset += 1

return code, offset

Expand Down
6 changes: 3 additions & 3 deletions slothy/targets/arm_v7m/arch_v7m.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@
from sympy import simplify

llvm_mca_arch = "arm"
llvm_mc_arch = "arm" ### TODO: What to put here?
llvm_mc_attr = "armv5te,thumb2,dsp" ### TODO: What to put here?
llvm_mc_arch = "thumb"
llvm_mc_attr = "armv7e-m,thumb2,dsp,fpregs"

unicorn_arch = UC_ARCH_ARM
unicorn_mode = UC_MODE_ARM
unicorn_mode = UC_MODE_THUMB | UC_MODE_MCLASS

class RegisterType(Enum):
GPR = 1
Expand Down

0 comments on commit 4317ce7

Please sign in to comment.