Skip to content

Commit

Permalink
Implement global self test
Browse files Browse the repository at this point in the history
This commit prototypes an implementation of a 'global' selftest,
building and emulating the entire source code loaded into SLOTHY
and checking that input and output behave equivalently.

To use, call `slothy.global_selftest(...)` at any point during
stateful SLOTHY operations. Then, the global selftest will compare
the current state of the code with the original state.

See ntt_kyber_123_4567 for an example.
  • Loading branch information
hanno-becker committed Dec 9, 2024
1 parent f6c6404 commit a359d3f
Show file tree
Hide file tree
Showing 9 changed files with 357 additions and 67 deletions.
32 changes: 27 additions & 5 deletions example.py
Original file line number Diff line number Diff line change
Expand Up @@ -656,6 +656,24 @@ def core(self,slothy):
slothy.config.inputs_are_outputs = True
slothy.optimize(start="start", end="end")

class Armv7mExample0Func(Example):
def __init__(self, var="", arch=Arch_Armv7M, target=Target_CortexM7):
name = "armv7m_simple0_func"
infile = name

if var != "":
name += f"_{var}"
infile += f"_{var}"
name += f"_{target_label_dict[target]}"

super().__init__(infile, name, rename=True, arch=arch, target=target)

def core(self,slothy):
slothy.config.variable_size=True
slothy.config.inputs_are_outputs = True
slothy.optimize(start="start", end="end")
slothy.global_selftest("my_func", {"r0": 1024 })

class Armv7mLoopSubs(Example):
def __init__(self, var="", arch=Arch_Armv7M, target=Target_CortexM7):
name = "loop_subs"
Expand Down Expand Up @@ -688,7 +706,7 @@ def core(self,slothy):
slothy.config.variable_size=True
slothy.config.outputs = ["r6"]
slothy.optimize_loop("start")

class Armv7mLoopVmovCmp(Example):
def __init__(self, var="", arch=Arch_Armv7M, target=Target_CortexM7):
name = "loop_vmov_cmp"
Expand Down Expand Up @@ -720,7 +738,7 @@ def __init__(self, var="", arch=AArch64_Neon, target=Target_CortexA55):

def core(self,slothy):
slothy.optimize()

class ntt_kyber_123_4567(Example):
def __init__(self, var="", arch=AArch64_Neon, target=Target_CortexA55, timeout=None):
name = "ntt_kyber_123_4567"
Expand All @@ -744,6 +762,9 @@ def core(self, slothy):
slothy.config.constraints.stalls_first_attempt = 64
slothy.optimize_loop("layer123_start")
slothy.optimize_loop("layer4567_start")
# Build + emulate entire function to test that behaviour has not changed
slothy.global_selftest("ntt_kyber_123_4567",
{"x0": 1024, "x1": 1024, "x3": 1024, "x4": 1024, "x5": 1024})

class intt_kyber_123_4567(Example):
def __init__(self, var="", arch=AArch64_Neon, target=Target_CortexA55, timeout=None):
Expand Down Expand Up @@ -1226,7 +1247,7 @@ def core(self, slothy):
slothy.config.constraints.stalls_first_attempt = 110
slothy.optimize_loop("layer123_start")




class ntt_dilithium_123(Example):
Expand Down Expand Up @@ -1349,7 +1370,7 @@ def core(self, slothy):
slothy.optimize_loop("layer5678_start")

slothy.config = conf.copy()

if self.timeout is not None:
slothy.config.timeout = self.timeout // 12

Expand All @@ -1366,7 +1387,7 @@ def core(self, slothy):
slothy.config.split_heuristic_stepsize = 0.1
slothy.config.constraints.stalls_first_attempt = 14
slothy.optimize_loop("layer1234_start")


class ntt_dilithium_1234(Example):
def __init__(self, var="", arch=AArch64_Neon, target=Target_CortexA72):
Expand Down Expand Up @@ -1513,6 +1534,7 @@ def main():

# Armv7m examples
Armv7mExample0(),
Armv7mExample0Func(),

# Loop examples
AArch64LoopSubs(),
Expand Down
25 changes: 20 additions & 5 deletions examples/naive/aarch64/ntt_kyber_123_4567.s
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,13 @@
/// SOFTWARE.
///

// Commented out for simple standalone emulation not
// requiring correct constant data
//
// Should be commented when used.
//
// Needed to provide ASM_LOAD directive
#include <hal_env.h>
// #include <hal_envh>

.macro mulmodq dst, src, const, idx0, idx1
sqrdmulh t2.8h, \src\().8h, \const\().h[\idx1]
Expand Down Expand Up @@ -154,7 +159,12 @@
.data
.p2align 4
roots:
#include "ntt_kyber_123_45_67_twiddles.s"
// Commented out for simple standalone emulation not
// requiring correct constant data
//
// Should be commented when used.
//
// #include "ntt_kyber_123_45_67_twiddles.s"

in .req x0
inp .req x1
Expand Down Expand Up @@ -223,9 +233,14 @@ ntt_kyber_123_4567:
_ntt_kyber_123_4567:
push_stack

ASM_LOAD(r_ptr0, roots)
ASM_LOAD(r_ptr1, roots_l56)
ASM_LOAD(xtmp, const_addr)
// Commented out for simple standalone emulation not
// requiring correct constant data.
//
// Should be commented when used.
//
// ASM_LOAD(r_ptr0, roots)
// ASM_LOAD(r_ptr1, roots_l56)
// ASM_LOAD(xtmp, const_addr)

ld1 {consts.8h}, [xtmp]

Expand Down
20 changes: 20 additions & 0 deletions examples/naive/armv7m/armv7m_simple0_func.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
.syntax unified
//.cpu cortex-m4 // llvm-mc does not like this...
//.thumb // unicorn seems to get confused by this...

.align 2
.global my_func
// .type my_func, %function // llvm-mc does not like this...
my_func:
push {r4-r11, lr}

start:
ldr r8, [r0, #4]
add r8, r2, r8
eor.w r8, r8, r3
smlabt r3, r2, r2, r8
asrs r3, r3, #1
str r3, [r0, #4]
end:

pop {r4-r11, pc}
39 changes: 8 additions & 31 deletions slothy/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,8 @@ def selftest(self):
equivalence-check the loop-form (including the compare+branch instructions
at the loop boundary) rather than the unrolled code.
DEPENDENCY: To run this, you need `llvm-mc` the binary in your path or configured
as via `llvm_mc_binary`, and `unicorn-engine` Python bindings setup.
DEPENDENCY: To run this, you need `llvm-nm`, `llvm-readobj`, `llvm-mc`
in your PATH. Those are part of a standard LLVM setup.
NOTE: This is so far implemented as a repeated randomized test -- nothing clever.
"""
Expand Down Expand Up @@ -416,8 +416,8 @@ def with_llvm_mca(self):
"""Indicates whether LLVM MCA should be run prior and after optimization
to obtain approximate performance data based on LLVM's scheduling models.
If this is set, both Config.llvm_mca_binary and Config.compiler_binary
need to be set.
If this is set, Config.compiler_binary need to be set, and llcm-mca in
your PATH.
"""
return self._with_llvm_mca_before and self._with_llvm_mca_after

Expand All @@ -438,8 +438,8 @@ def with_llvm_mca_before(self):
"""Indicates whether LLVM MCA should be run prior to optimization
to obtain approximate performance data based on LLVM's scheduling models.
If this is set, both Config.llvm_mca_binary and Config.compiler_binary
need to be set.
If this is set, Config.compiler_binary need to be set, and llcm-mca in
your PATH.
"""
return self._with_llvm_mca_before

Expand All @@ -448,8 +448,8 @@ def with_llvm_mca_after(self):
"""Indicates whether LLVM MCA should be run after optimization
to obtain approximate performance data based on LLVM's scheduling models.
If this is set, both Config.llvm_mca_binary and Config.compiler_binary
need to be set.
If this is set, Config.compiler_binary need to be set, and llcm-mca in
your PATH.
"""
return self._with_llvm_mca_after

Expand All @@ -469,21 +469,6 @@ def compiler_include_paths(self):
or `with_llvm_mca_after` are set."""
return self._compiler_include_paths

@property
def llvm_mca_binary(self):
"""The llvm-mca binary to be used for estimated performance annotations
This is only relevant if `with_llvm_mca_before` or `with_llvm_mca_after`
is set."""
return self._llvm_mca_binary

@property
def llvm_mc_binary(self):
"""The llvm-mc binary to be used for assembling output data
This is only relevant if `selftest` is set."""
return self._llvm_mc_binary

@property
def timeout(self):
"""The timeout in seconds after which the underlying constraint solver stops
Expand Down Expand Up @@ -1228,8 +1213,6 @@ def __init__(self, Arch, Target):

self._compiler_binary = "gcc"
self._compiler_include_paths = None
self._llvm_mca_binary = "llvm-mca"
self._llvm_mc_binary = "llvm-mc"

self.keep_tags = True
self.inherit_macro_comments = False
Expand Down Expand Up @@ -1377,12 +1360,6 @@ def compiler_binary(self, val):
@compiler_include_paths.setter
def compiler_include_paths(self, val):
self._compiler_include_paths = val
@llvm_mca_binary.setter
def llvm_mca_binary(self, val):
self._llvm_mca_binary = val
@llvm_mc_binary.setter
def llvm_mc_binary(self, val):
self._llvm_mc_binary = val
@timeout.setter
def timeout(self, val):
self._timeout = val
Expand Down
10 changes: 5 additions & 5 deletions slothy/core/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@
from slothy.helper import LockAttributes, Permutation, DeferHandler, SourceLine, LLVM_Mc

try:
from unicorn import Uc
from unicorn import *
from unicorn.arm_const import *
except ImportError:
Uc = None

Expand Down Expand Up @@ -877,11 +878,10 @@ def selftest(self, log):
self._config.arch.RegisterType.list_registers(ty)]

def run_code(code, txt=None):
objcode = LLVM_Mc.assemble(code, self._config.llvm_mc_binary,
objcode, offset = LLVM_Mc.assemble(code,
self._config.arch.llvm_mc_arch,
self._config.arch.llvm_mc_attr,
log)

# Setup emulator
mu = Uc(self.config.arch.unicorn_arch, self.config.arch.unicorn_mode)
# Copy initial register contents into emulator
Expand All @@ -897,7 +897,7 @@ def run_code(code, txt=None):
mu.mem_map(RAM_BASE, RAM_SZ)
mu.mem_write(RAM_BASE, initial_memory)
# Run emulator
mu.emu_start(CODE_BASE, CODE_BASE + len(objcode))
mu.emu_start(CODE_BASE + offset, CODE_BASE + len(objcode))

final_register_contents = {}
for r in regs:
Expand Down Expand Up @@ -937,7 +937,7 @@ def run_code(code, txt=None):
if final_regs_old[r] != final_regs_new[r]:
raise SlothySelfTestException(f"Selftest failed: Register mismatch for {r}: {hex(final_regs_old[r])} != {hex(final_regs_new[r])}")

log.info("Selftest: OK")
log.info("Local selftest: OK")

def selfcheck_with_fixup(self, log):
"""Do selfcheck, and consider preamble/postamble fixup in case of SW pipelining
Expand Down
Loading

0 comments on commit a359d3f

Please sign in to comment.