diff --git a/example.py b/example.py
index 9decb2e0..030f3b60 100644
--- a/example.py
+++ b/example.py
@@ -656,6 +656,24 @@ def core(self,slothy):
         slothy.config.inputs_are_outputs = True
         slothy.optimize(start="start", end="end")
 
+class Armv7mExample0Func(Example):
+    def __init__(self, var="", arch=Arch_Armv7M, target=Target_CortexM7):
+        name = "armv7m_simple0_func"
+        infile = name
+
+        if var != "":
+            name += f"_{var}"
+            infile += f"_{var}"
+        name += f"_{target_label_dict[target]}"
+
+        super().__init__(infile, name, rename=True, arch=arch, target=target)
+
+    def core(self,slothy):
+        slothy.config.variable_size=True
+        slothy.config.inputs_are_outputs = True
+        slothy.optimize(start="start", end="end")
+        slothy.global_selftest("my_func", {"r0": 1024 })
+
 class Armv7mLoopSubs(Example):
     def __init__(self, var="", arch=Arch_Armv7M, target=Target_CortexM7):
         name = "loop_subs"
@@ -688,7 +706,7 @@ def core(self,slothy):
         slothy.config.variable_size=True
         slothy.config.outputs = ["r6"]
         slothy.optimize_loop("start")
-        
+
 class Armv7mLoopVmovCmp(Example):
     def __init__(self, var="", arch=Arch_Armv7M, target=Target_CortexM7):
         name = "loop_vmov_cmp"
@@ -720,7 +738,7 @@ def __init__(self, var="", arch=AArch64_Neon, target=Target_CortexA55):
 
     def core(self,slothy):
         slothy.optimize()
-        
+
 class ntt_kyber_123_4567(Example):
     def __init__(self, var="", arch=AArch64_Neon, target=Target_CortexA55, timeout=None):
         name = "ntt_kyber_123_4567"
@@ -744,6 +762,9 @@ def core(self, slothy):
         slothy.config.constraints.stalls_first_attempt = 64
         slothy.optimize_loop("layer123_start")
         slothy.optimize_loop("layer4567_start")
+        # Build + emulate entire function to test that behaviour has not changed
+        slothy.global_selftest("ntt_kyber_123_4567",
+                               {"x0": 1024, "x1": 1024, "x3": 1024, "x4": 1024, "x5": 1024})
 
 class intt_kyber_123_4567(Example):
     def __init__(self, var="", arch=AArch64_Neon, target=Target_CortexA55, timeout=None):
@@ -1226,7 +1247,7 @@ def core(self, slothy):
         slothy.config.constraints.stalls_first_attempt = 110
         slothy.optimize_loop("layer123_start")
 
-        
+
 
 
 class ntt_dilithium_123(Example):
@@ -1349,7 +1370,7 @@ def core(self, slothy):
         slothy.optimize_loop("layer5678_start")
 
         slothy.config = conf.copy()
-        
+
         if self.timeout is not None:
             slothy.config.timeout = self.timeout // 12
 
@@ -1366,7 +1387,7 @@ def core(self, slothy):
         slothy.config.split_heuristic_stepsize = 0.1
         slothy.config.constraints.stalls_first_attempt = 14
         slothy.optimize_loop("layer1234_start")
-            
+
 
 class ntt_dilithium_1234(Example):
     def __init__(self, var="", arch=AArch64_Neon, target=Target_CortexA72):
@@ -1513,6 +1534,7 @@ def main():
 
                 # Armv7m examples
                  Armv7mExample0(),
+                 Armv7mExample0Func(),
 
                 # Loop examples
                  AArch64LoopSubs(),
diff --git a/examples/naive/aarch64/ntt_kyber_123_4567.s b/examples/naive/aarch64/ntt_kyber_123_4567.s
index 0f2c9ae1..a92b887b 100644
--- a/examples/naive/aarch64/ntt_kyber_123_4567.s
+++ b/examples/naive/aarch64/ntt_kyber_123_4567.s
@@ -23,8 +23,13 @@
 /// SOFTWARE.
 ///
 
+// Commented out for simple standalone emulation not
+// requiring correct constant data
+//
+// Should be commented when used.
+//
 // Needed to provide ASM_LOAD directive
-#include <hal_env.h>
+// #include <hal_envh>
 
 .macro mulmodq dst, src, const, idx0, idx1
         sqrdmulh t2.8h,   \src\().8h, \const\().h[\idx1]
@@ -154,7 +159,12 @@
 .data
 .p2align 4
 roots:
-        #include "ntt_kyber_123_45_67_twiddles.s"
+// Commented out for simple standalone emulation not
+// requiring correct constant data
+//
+// Should be commented when used.
+//
+//        #include "ntt_kyber_123_45_67_twiddles.s"
 
         in      .req x0
         inp     .req x1
@@ -223,9 +233,14 @@ ntt_kyber_123_4567:
 _ntt_kyber_123_4567:
         push_stack
 
-        ASM_LOAD(r_ptr0, roots)
-        ASM_LOAD(r_ptr1, roots_l56)
-        ASM_LOAD(xtmp, const_addr)
+// Commented out for simple standalone emulation not
+// requiring correct constant data.
+//
+// Should be commented when used.
+//
+//        ASM_LOAD(r_ptr0, roots)
+//        ASM_LOAD(r_ptr1, roots_l56)
+//        ASM_LOAD(xtmp, const_addr)
 
         ld1 {consts.8h}, [xtmp]
 
diff --git a/slothy/core/config.py b/slothy/core/config.py
index 1140a2bb..b14e69f2 100644
--- a/slothy/core/config.py
+++ b/slothy/core/config.py
@@ -122,8 +122,8 @@ def selftest(self):
           equivalence-check the loop-form (including the compare+branch instructions
           at the loop boundary) rather than the unrolled code.
 
-        DEPENDENCY: To run this, you need `llvm-mc` the binary in your path or configured
-        as via `llvm_mc_binary`, and `unicorn-engine` Python bindings setup.
+        DEPENDENCY: To run this, you need `llvm-nm`, `llvm-readobj`, `llvm-mc`
+                    in your PATH. Those are part of a standard LLVM setup.
 
         NOTE: This is so far implemented as a repeated randomized test -- nothing clever.
         """
@@ -469,21 +469,6 @@ def compiler_include_paths(self):
         or `with_llvm_mca_after` are set."""
         return self._compiler_include_paths
 
-    @property
-    def llvm_mca_binary(self):
-        """The llvm-mca binary to be used for estimated performance annotations
-
-        This is only relevant if `with_llvm_mca_before` or `with_llvm_mca_after`
-        is set."""
-        return self._llvm_mca_binary
-
-    @property
-    def llvm_mc_binary(self):
-        """The llvm-mc binary to be used for assembling output data
-
-        This is only relevant if `selftest` is set."""
-        return self._llvm_mc_binary
-
     @property
     def timeout(self):
         """The timeout in seconds after which the underlying constraint solver stops
@@ -1228,8 +1213,6 @@ def __init__(self, Arch, Target):
 
         self._compiler_binary = "gcc"
         self._compiler_include_paths = None
-        self._llvm_mca_binary = "llvm-mca"
-        self._llvm_mc_binary = "llvm-mc"
 
         self.keep_tags = True
         self.inherit_macro_comments = False
@@ -1377,12 +1360,6 @@ def compiler_binary(self, val):
     @compiler_include_paths.setter
     def compiler_include_paths(self, val):
         self._compiler_include_paths = val
-    @llvm_mca_binary.setter
-    def llvm_mca_binary(self, val):
-        self._llvm_mca_binary = val
-    @llvm_mc_binary.setter
-    def llvm_mc_binary(self, val):
-        self._llvm_mc_binary = val
     @timeout.setter
     def timeout(self, val):
         self._timeout = val
diff --git a/slothy/core/core.py b/slothy/core/core.py
index 8f4bcff1..d635db28 100644
--- a/slothy/core/core.py
+++ b/slothy/core/core.py
@@ -877,11 +877,10 @@ def selftest(self, log):
             self._config.arch.RegisterType.list_registers(ty)]
 
         def run_code(code, txt=None):
-            objcode = LLVM_Mc.assemble(code, self._config.llvm_mc_binary,
+            objcode, offset = LLVM_Mc.assemble(code,
                                        self._config.arch.llvm_mc_arch,
                                        self._config.arch.llvm_mc_attr,
                                        log)
-
             # Setup emulator
             mu = Uc(self.config.arch.unicorn_arch, self.config.arch.unicorn_mode)
             # Copy initial register contents into emulator
@@ -937,7 +936,7 @@ def run_code(code, txt=None):
                 if final_regs_old[r] != final_regs_new[r]:
                     raise SlothySelfTestException(f"Selftest failed: Register mismatch for {r}: {hex(final_regs_old[r])} != {hex(final_regs_new[r])}")
 
-        log.info("Selftest: OK")
+        log.info("Local selftest: OK")
 
     def selfcheck_with_fixup(self, log):
         """Do selfcheck, and consider preamble/postamble fixup in case of SW pipelining
diff --git a/slothy/core/slothy.py b/slothy/core/slothy.py
index 76cbc1ae..dcd2e33e 100644
--- a/slothy/core/slothy.py
+++ b/slothy/core/slothy.py
@@ -45,6 +45,7 @@
 This module provides the Slothy class, which is a stateful interface to both
 one-shot and heuristic optimiations using SLOTHY."""
 
+import os
 import logging
 from types import SimpleNamespace
 
@@ -54,7 +55,16 @@
 from slothy.core.heuristics import Heuristics
 from slothy.helper import CPreprocessor, SourceLine
 from slothy.helper import AsmAllocation, AsmMacro, AsmHelper, AsmIfElse
-from slothy.helper import CPreprocessor, LLVM_Mca, LLVM_Mca_Error
+from slothy.helper import CPreprocessor, LLVM_Mca, LLVM_Mc, LLVM_Mca_Error
+
+try:
+    from unicorn import *
+    from unicorn.arm64_const import *
+except ImportError:
+    Uc = None
+
+class SlothyGlobalSelfTestException(Exception):
+    """Exception thrown upon global selftest failures"""
 
 class Slothy:
     """SLOTHY optimizer
@@ -87,6 +97,7 @@ def __init__(self, arch, target, logger=None):
 
         # The source, once loaded, is represented as a list of strings
         self._source = None
+        self._original_source = None
         self.results = None
 
         self.last_result = None
@@ -99,21 +110,40 @@ def source(self):
         If you want the current source code as a multiline string, use get_source_as_string()."""
         return self._source
 
+    @property
+    def original_source(self):
+        """Returns the original source code as an array of SourceLine objects
+
+        If you want the current source code as a multiline string, use get_original_source_as_string()."""
+        return self._original_source
+
     @source.setter
     def source(self, val):
         assert SourceLine.is_source(val)
         self._source = val
 
+    @original_source.setter
+    def original_source(self, val):
+        assert SourceLine.is_source(val)
+        self._original_source = val
+
     def get_source_as_string(self, comments=True, indentation=True, tags=True):
         """Retrieve current source code as multi-line string"""
         return SourceLine.write_multiline(self.source, comments=comments,
             indentation=indentation, tags=tags)
 
+    def get_original_source_as_string(self, comments=True, indentation=True, tags=True):
+        """Retrieve original source code as multi-line string"""
+        return SourceLine.write_multiline(self.original_source, comments=comments,
+            indentation=indentation, tags=tags)
+
     def set_source_as_string(self, s):
         """Provide input source code as multi-line string"""
         assert isinstance(s, str)
         reduce = not self.config.ignore_tags
         self.source = SourceLine.read_multiline(s, reduce=reduce)
+        if self.original_source is None:
+            self.original_source = self.source
 
     def load_source_raw(self, source):
         """Load source code from multi-line string"""
@@ -145,6 +175,114 @@ def _dump(name, s, logger, err=False):
         for l in s:
             fun(f"> {l}")
 
+    def global_selftest(self, funcname, address_gprs, iterations=5):
+        """Conduct a function-level selftest
+
+        - funcname: Name of function to be called. Must be exposed as a symbol
+        - address_prs: Dictionary indicating which GPRs are pointers to buffers of which size.
+            For example, `{ "x0": 1024, "x4": 1024 }` would indicate that both x0 and x4
+            point to buffers of size 1024 bytes. The global selftest needs to know this to
+            setup valid calls to the assembly routine.
+
+        DEPENDENCY: To run this, you need `llvm-nm`, `llvm-readobj`, `llvm-mc`
+                    in your PATH. Those are part of a standard LLVM setup.
+        """
+
+        log = self.logger.getChild(f"global_selftest_{funcname}")
+
+        if Uc is None:
+            raise SlothyGlobalSelfTestException("Cannot run selftest -- unicorn-engine is not available.")
+
+        if self.config.arch.unicorn_arch is None or \
+           self.config.arch.llvm_mc_arch is None:
+            log.warning("Selftest not supported on target architecture")
+            return
+
+        old_source = self.original_source
+        new_source = self.source
+
+        CODE_BASE = 0x010000
+        CODE_SZ = 0x010000
+        CODE_END = CODE_BASE + CODE_SZ
+        RAM_BASE = 0x030000
+        RAM_SZ = 0x010000
+        STACK_BASE = 0x040000
+        STACK_SZ = 0x010000
+        STACK_TOP = STACK_BASE + STACK_SZ
+
+        regs = [r for ty in self.config.arch.RegisterType for r in \
+            self.config.arch.RegisterType.list_registers(ty)]
+
+        def run_code(code, txt=None):
+            objcode, offset = LLVM_Mc.assemble(code,
+                                       self.config.arch.llvm_mc_arch,
+                                       self.config.arch.llvm_mc_attr,
+                                       log, symbol=funcname,
+                                       preprocessor=self.config.compiler_binary,
+                                       include_paths=self.config.compiler_include_paths)
+            # Setup emulator
+            mu = Uc(self.config.arch.unicorn_arch, self.config.arch.unicorn_mode)
+            # Copy initial register contents into emulator
+            for r,v in initial_register_contents.items():
+                ur = self.config.arch.RegisterType.unicorn_reg_by_name(r)
+                if ur is None:
+                    continue
+                mu.reg_write(ur, v)
+            # Put a valid address in the LR that serves as the marker to terminate emulation
+            mu.reg_write(self.config.arch.RegisterType.unicorn_link_register(), CODE_END)
+            # Setup stack
+            mu.reg_write(self.config.arch.RegisterType.unicorn_stack_pointer(), STACK_TOP)
+            # Copy code into emulator
+            mu.mem_map(CODE_BASE, CODE_SZ)
+            mu.mem_write(CODE_BASE, objcode)
+
+            # Copy initial memory contents into emulator
+            mu.mem_map(RAM_BASE, RAM_SZ)
+            mu.mem_write(RAM_BASE, initial_memory)
+            # Setup stack
+            mu.mem_map(STACK_BASE, STACK_SZ)
+            mu.mem_write(STACK_BASE, initial_stack)
+            # Run emulator
+            mu.emu_start(CODE_BASE + offset, CODE_END)
+
+            final_register_contents = {}
+            for r in regs:
+                ur = self.config.arch.RegisterType.unicorn_reg_by_name(r)
+                if ur is None:
+                    continue
+                final_register_contents[r] = mu.reg_read(ur)
+            final_memory_contents = mu.mem_read(RAM_BASE, RAM_SZ)
+
+            return final_register_contents, final_memory_contents
+
+        for _ in range(iterations):
+            initial_memory = os.urandom(RAM_SZ)
+            initial_stack = os.urandom(STACK_SZ)
+            cur_ram = RAM_BASE
+            # Set initial register contents arbitrarily, except for registers
+            # which must hold valid memory addresses.
+            initial_register_contents = {}
+            for r in regs:
+                initial_register_contents[r] = int.from_bytes(os.urandom(16))
+            for (reg, sz) in address_gprs.items():
+                initial_register_contents[reg] = cur_ram
+                cur_ram += sz
+
+            final_regs_old, final_mem_old = run_code(old_source, txt="old")
+            final_regs_new, final_mem_new = run_code(new_source, txt="new")
+
+            # Check if memory contents are the same
+            if final_mem_old != final_mem_new:
+                raise SlothyGlobalSelfTestException(f"Selftest failed: Memory mismatch")
+
+            # Check that callee-saved registers are the same
+            regs_expected = self.config.arch.RegisterType.callee_saved_registers()
+            for r in regs_expected:
+                if final_regs_old[r] != final_regs_new[r]:
+                    raise SlothyGlobalSelfTestException(f"Selftest failed: Register mismatch for {r}: {hex(final_regs_old[r])} != {hex(final_regs_new[r])}")
+
+        log.info(f"Global selftest for {funcname}: OK")
+
     #
     # Stateful wrappers around heuristics
     #
diff --git a/slothy/helper.py b/slothy/helper.py
index 020ea66d..354ea3ab 100644
--- a/slothy/helper.py
+++ b/slothy/helper.py
@@ -27,6 +27,7 @@
 
 import re
 import subprocess
+import platform
 import logging
 from abc import ABC, abstractmethod
 from sympy import simplify
@@ -1085,11 +1086,69 @@ class LLVM_Mc():
     """Helper class for the application of the LLVM MC tool"""
 
     @staticmethod
-    def assemble(source, mc_binary, arch, attr, log):
-        """Runs LLVM-MC tool to assemble `source`, returning byte code"""
+    def llvm_mc_output_extract_text_section(objfile):
+        """Extracts offset and size of .text section from an objectfile
+        emitted by llvm-mc."""
+
+        # We use llvm-readobj to inspect the objectfile, which works
+        # for both ELF and MachOS object files. Unfortunately, however,
+        # the output formats of both tools are not the same. Moreovoer,
+        # the output when selecting JSON as the output format, is not valid JSON.
+        # So we're left to hacky string munging.
+
+        # Feed object file through llvm-readobj
+        r = subprocess.run(["llvm-readobj", "-S", "-"], input=objfile, capture_output=True, check=True)
+        objfile_txt = r.stdout.decode().split("\n")
+
+        # We expect something like this here
+        # ```
+        # File: test.o
+        # Format: Mach-O arm
+        # Arch: arm
+        # AddressSize: 32bit
+        # Sections [
+        #   Section {
+        #     Index: 0
+        #     Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+        #     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+        #     Address: 0x0
+        #     Size: 0x4
+        #     Offset: 176
+        #     Alignment: 0
+        #     RelocationOffset: 0x0
+        #     RelocationCount: 0
+        #     Type: Regular (0x0)
+        #     Attributes [ (0x800004)
+        #       PureInstructions (0x800000)
+        #       SomeInstructions (0x4)
+        #     ]
+        #     Reserved1: 0x0
+        #     Reserved2: 0x0
+        #   }
+        # ]
+        # ```
+        # So we look for lines "Name: __text" and lines "Offset: ...".
+        def parse_as_int(s):
+            if s.startswith("0x"):
+                return int(s, base=16)
+            else:
+                return int(s,base=10)
+
+        sections = filter(lambda l: l.strip().startswith("Name: "), objfile_txt)
+        sections = list(map(lambda l: l.strip().removeprefix("Name: ").split(' ')[0].strip(), sections))
+        offsets = filter(lambda l: l.strip().startswith("Offset: "), objfile_txt)
+        offsets = map(lambda l: parse_as_int(l.strip().removeprefix("Offset: ")), offsets)
+        sizes = filter(lambda l: l.strip().startswith("Size: "), objfile_txt)
+        sizes = map(lambda l: parse_as_int(l.strip().removeprefix("Size: ")), sizes)
+        sections_with_offsets = { s:(o,sz) for (s,o,sz) in zip(sections, offsets, sizes) }
+        text_section = list(filter(lambda s: "text" in s, sections))
+        if len(text_section) != 1:
+            raise LLVM_Mc_Error(f"Could not find unambiguous text section in object file. Sections: {sections}")
+        return sections_with_offsets[text_section[0]]
 
-        LLVM_MCA_BEGIN = SourceLine("").add_comment("LLVM-MCA-BEGIN")
-        LLVM_MCA_END = SourceLine("").add_comment("LLVM-MCA-END")
+    @staticmethod
+    def assemble(source, arch, attr, log, symbol=None, preprocessor=None, include_paths=None):
+        """Runs LLVM-MC tool to assemble `source`, returning byte code"""
 
         # Unfortunately, there is no option to directly extract byte code
         # from LLVM-MC: One either gets a textual description, or an object file.
@@ -1097,29 +1156,45 @@ def assemble(source, mc_binary, arch, attr, log):
         # code directly from the textual output, which for every assembly line
         # has a "encoding: [byte0, byte1, ...]" comment at the end.
 
+        if symbol is None:
+            source = [SourceLine(".global harness"),
+                      SourceLine("harness:")] + source
+            symbol = "harness"
+
+        if preprocessor is not None:
+            # First, run the C preprocessor on the code
+            try:
+                source = CPreprocessor.unfold([], source, [], preprocessor,
+                                              include=include_paths)
+            except subprocess.CalledProcessError as exc:
+                raise LLVM_Mc_Error from exc
+
         code = SourceLine.write_multiline(source)
+
         log.debug(f"Calling LLVM MC assmelber on the following code")
         log.debug(code)
-        args = [f"--arch={arch}", "--assemble", "--show-encoding"]
+        args = [f"--arch={arch}", "--assemble", "--filetype=obj"]
         if attr is not None:
             args.append(f"--mattr={attr}")
         try:
-            r = subprocess.run([mc_binary] + args,
-                               input=code, text=True, capture_output=True, check=True)
+            r = subprocess.run(["llvm-mc"] + args,
+                               input=code.encode(), capture_output=True, check=True)
         except subprocess.CalledProcessError as exc:
             raise LLVM_Mc_Error from exc
 
-        res = r.stdout.split('\n')
-        res = filter(lambda s: "encoding:" in s, res)
-        res = list(map(lambda s: s.split("encoding:")[1].strip(), res))
-
-        # Every line has the form "[byte, byte, byte,...]" now -- interpret as byte array
-        # Bit hacky, but nevermind...
-        def string_as_byte_array(s):
-            return s.replace("[", "").replace("]", "").split(",")
-        res = list(map(string_as_byte_array, res))
-        res = [int(b, base=16) for l in res for b in l] # Flatten
-        return bytes(res)
+        # TODO: If there are relocations remaining, we should fail at this point
+
+        objfile = r.stdout
+        offset, sz = LLVM_Mc.llvm_mc_output_extract_text_section(objfile)
+        code = objfile[offset:offset+sz]
+
+        # Extract symbol table
+        r = subprocess.run(["llvm-nm","-"], input=objfile, capture_output=True)
+        out = r.stdout.decode()
+        symbol = next(filter(lambda l: symbol in l, out.split("\n")))
+        offset = int(symbol.split(" ")[0], base=16)
+
+        return code, offset
 
 class LLVM_Mca_Error(Exception):
     """Exception thrown if llvm-mca subprocess fails"""
diff --git a/slothy/targets/aarch64/aarch64_neon.py b/slothy/targets/aarch64/aarch64_neon.py
index 9547e4ac..e6fac7cf 100644
--- a/slothy/targets/aarch64/aarch64_neon.py
+++ b/slothy/targets/aarch64/aarch64_neon.py
@@ -81,6 +81,18 @@ def __repr__(self):
     def spillable(reg_type):
         return reg_type in [RegisterType.GPR, RegisterType.NEON]
 
+    @staticmethod
+    def callee_saved_registers():
+        return [f"x{i}" for i in range(18,31)] + [f"v{i}" for i in range(8,16)]
+
+    @staticmethod
+    def unicorn_link_register():
+        return UC_ARM64_REG_X30
+
+    @staticmethod
+    def unicorn_stack_pointer():
+        return UC_ARM64_REG_SP
+
     @cache
     @staticmethod
     def unicorn_reg_by_name(reg):
diff --git a/slothy/targets/arm_v7m/arch_v7m.py b/slothy/targets/arm_v7m/arch_v7m.py
index aa9bf276..216b18cc 100644
--- a/slothy/targets/arm_v7m/arch_v7m.py
+++ b/slothy/targets/arm_v7m/arch_v7m.py
@@ -13,11 +13,11 @@
 from sympy import simplify
 
 llvm_mca_arch = "arm"
-llvm_mc_arch = "arm"
-llvm_mc_attr = "armv5te"
+llvm_mc_arch = "arm" ### TODO: What to put here?
+llvm_mc_attr = "armv5te,thumb2,dsp" ### TODO: What to put here?
 
 unicorn_arch = UC_ARCH_ARM
-unicorn_mode = UC_MODE_ARM
+unicorn_mode = UC_MODE_THUMB
 
 class RegisterType(Enum):
     GPR = 1
@@ -35,6 +35,18 @@ def __repr__(self):
     def spillable(reg_type):
         return reg_type in [RegisterType.GPR]
 
+    @staticmethod
+    def callee_saved_registers():
+        return [f"r{i}" for i in range(4,12)] + [f"s{i}" for i in range(0,16)]
+
+    @staticmethod
+    def unicorn_link_register():
+        return UC_ARM_REG_LR
+
+    @staticmethod
+    def unicorn_stack_pointer():
+        return UC_ARM_REG_SP
+
     @cache
     @staticmethod
     def unicorn_reg_by_name(reg):