Skip to content

Commit

Permalink
Merge pull request #126 from slothy-optimizer/minor
Browse files Browse the repository at this point in the history
Improve debuggability of selftest, expand AArch64 model
  • Loading branch information
hanno-becker authored Dec 17, 2024
2 parents 7223d66 + f1fef46 commit cbd97a1
Show file tree
Hide file tree
Showing 7 changed files with 81 additions and 35 deletions.
11 changes: 11 additions & 0 deletions slothy/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -1350,6 +1350,15 @@ def run_code(code, txt=None):

return final_register_contents, final_memory_contents

def failure_dump():
log.error("Selftest failed")
log.error("Input code:")
log.error(SourceLine.write_multiline(codeA))
log.error("Output code:")
log.error(SourceLine.write_multiline(codeB))
log.error("Output registers:")
log.error(output_registers)

for _ in range(iterations):
initial_memory = os.urandom(RAM_SZ)
initial_stack = os.urandom(STACK_SZ)
Expand All @@ -1370,6 +1379,7 @@ def run_code(code, txt=None):

# Check if memory contents are the same
if final_mem_old != final_mem_new:
failure_dump()
raise SelfTestException(f"Selftest failed: Memory mismatch")

# Check that callee-saved registers are the same
Expand All @@ -1378,6 +1388,7 @@ def run_code(code, txt=None):
if r.startswith("hint_"):
continue
if final_regs_old[r] != final_regs_new[r]:
failure_dump()
raise SelfTestException(f"Selftest failed: Register mismatch for {r}: {hex(final_regs_old[r])} != {hex(final_regs_new[r])}")

if fnsym is None:
Expand Down
9 changes: 6 additions & 3 deletions slothy/targets/aarch64/aarch64_big_experimental.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,8 @@ def get_min_max_objective(slothy):
(vand, vadd) : ExecutionUnit.V(),
(vxtn) : ExecutionUnit.V(),
veor3 : ExecutionUnit.V(),
(vshl, vshl_d, vshli, vshrn) : ExecutionUnit.V1(),
(VShiftImmediateBasic,
vshl_d, vshli, vshrn) : ExecutionUnit.V1(), # TODO: Should be V13?
vusra : ExecutionUnit.V1(),
AESInstruction : ExecutionUnit.V(),
Transpose : ExecutionUnit.V(),
Expand Down Expand Up @@ -141,7 +142,8 @@ def get_min_max_objective(slothy):
AArch64NeonLogical : 1,
(vmovi) : 1,
(vxtn) : 1,
(vshl, vshl_d, vshli, vshrn) : 1,
(VShiftImmediateBasic,
vshl_d, vshli, vshrn) : 1,
(vmul) : 2,
vusra : 1,
(vmlal, vmull) : 1,
Expand Down Expand Up @@ -180,7 +182,8 @@ def get_min_max_objective(slothy):
(vmul) : 5,
vusra : 4, # TODO: Add fwd path
(vmlal, vmull) : 4, # TODO: Add fwd path
(vshl, vshl_d, vshli, vshrn) : 2,
(VShiftImmediateBasic,
vshl_d, vshli, vshrn) : 2,
(AArch64BasicArithmetic,
AArch64ConditionalSelect,
AArch64ConditionalCompare,
Expand Down
34 changes: 25 additions & 9 deletions slothy/targets/aarch64/aarch64_neon.py
Original file line number Diff line number Diff line change
Expand Up @@ -2528,12 +2528,33 @@ class vsmlal2(Vmlal): # pylint: disable=missing-docstring,invalid-name
inputs = ["Va", "Vb"]
in_outs=["Vd"]

class vsrshr(AArch64Instruction): # pylint: disable=missing-docstring,invalid-name
class VShiftImmediateBasic(AArch64Instruction):
pass

class VShiftImmediateRounding(AArch64Instruction):
pass

class vsrshr(VShiftImmediateRounding): # pylint: disable=missing-docstring,invalid-name
pattern = "srshr <Vd>.<dt0>, <Va>.<dt1>, <imm>"
inputs = ["Va"]
outputs = ["Vd"]

class vshl(AArch64Instruction): # pylint: disable=missing-docstring,invalid-name
class vurshr(VShiftImmediateRounding): # pylint: disable=missing-docstring,invalid-name
pattern = "urshr <Vd>.<dt0>, <Va>.<dt1>, <imm>"
inputs = ["Va"]
outputs = ["Vd"]

class vsshr(VShiftImmediateBasic): # pylint: disable=missing-docstring,invalid-name
pattern = "sshr <Vd>.<dt0>, <Va>.<dt1>, <imm>"
inputs = ["Va"]
outputs = ["Vd"]

class vushr(VShiftImmediateBasic): # pylint: disable=missing-docstring,invalid-name
pattern = "ushr <Vd>.<dt0>, <Va>.<dt1>, <imm>"
inputs = ["Va"]
outputs = ["Vd"]

class vshl(VShiftImmediateBasic): # pylint: disable=missing-docstring,invalid-name
pattern = "shl <Vd>.<dt0>, <Va>.<dt1>, <imm>"
inputs = ["Va"]
outputs = ["Vd"]
Expand Down Expand Up @@ -2604,11 +2625,6 @@ def make(cls, src, force=False):
raise Instruction.ParsingException("Instruction ignored")
return AArch64Instruction.build(cls, src)

class vushr(AArch64Instruction): # pylint: disable=missing-docstring,invalid-name
pattern = "ushr <Vd>.<dt0>, <Va>.<dt1>, <imm>"
inputs = ["Va"]
outputs = ["Vd"]

class Transpose(AArch64Instruction): # pylint: disable=missing-docstring,invalid-name
pass

Expand Down Expand Up @@ -3222,7 +3238,7 @@ def eor3_fusion_cb():
"""
Example for a fusion call back. Allows to merge two eor instruction with
two inputs into one eor with three inputs. Such technique can help perform
transformations in case of differences between uArchs.
transformations in case of differences between uArchs.
Note: This is not used in any real (crypto) example. This is merely a PoC.
"""
def core(inst,t,log=None):
Expand Down Expand Up @@ -3285,7 +3301,7 @@ def eor3_splitting_cb():
"""
Example for a splitting call back. Allows to split one eor instruction with
three inputs into two eors with two inputs. Such technique can help perform
transformations in case of differences between uArchs.
transformations in case of differences between uArchs.
Note: This is not used in any real (crypto) example. This is merely a PoC.
"""
def core(inst,t,log=None):
Expand Down
22 changes: 14 additions & 8 deletions slothy/targets/aarch64/apple_m1_firestorm_experimental.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,11 @@ def get_min_max_objective(slothy):
vqrdmulh, vqrdmulh_lane,
vqdmulh_lane,
vmull, vmlal,
vsrshr, vushr, vusra, vshl,
vand, vbic, ASimdCompare): ExecutionUnit.V(),
vsrshr, vusra,
vand, vbic, ASimdCompare,
VShiftImmediateBasic,
VShiftImmediateRounding
): ExecutionUnit.V(),
(vadd, vsub,
trn1, trn2): ExecutionUnit.V(),
Vins: ExecutionUnit.V(), # guessed
Expand Down Expand Up @@ -183,8 +186,10 @@ def get_min_max_objective(slothy):
vmls, vmls_lane,
vqdmulh_lane,
vmull, vmlal,
vsrshr, vushr, vusra, vshl,
vand, vbic, ASimdCompare): 1,
vusra,
vand, vbic, ASimdCompare,
VShiftImmediateRounding,
VShiftImmediateBasic): 1,
(vadd, vsub,
trn1, trn2): 1,

Expand Down Expand Up @@ -237,9 +242,10 @@ def get_min_max_objective(slothy):
vmla, vmla_lane,
vqdmulh_lane,
vmull, vmlal,
vsrshr, vusra): 3,
(vshl, vushr,
vand, vbic, ASimdCompare): 2,
vusra): 3,
VShiftImmediateRounding: 3,
(vand, vbic, ASimdCompare,
VShiftImmediateBasic): 2,
(vadd, vsub,
trn1, trn2): 2,
Vins: 2, # or something less than 13
Expand Down Expand Up @@ -293,7 +299,7 @@ def get_latency(src, out_idx, dst):
if instclass_src == umaddl_wform and instclass_dst == umaddl_wform and \
src.args_out[0] == dst.args_in[2]:
return (3, lambda t_src, t_dst: t_dst.program_start_var == t_src.program_start_var + 1)

return latency


Expand Down
15 changes: 9 additions & 6 deletions slothy/targets/aarch64/apple_m1_icestorm_experimental.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,9 @@ def get_min_max_objective(slothy):
vqrdmulh, vqrdmulh_lane,
vqdmulh_lane,
vmull, vmlal,
vsrshr, vushr, vusra, vshl,
vand, vbic, ASimdCompare): ExecutionUnit.V(),
vusra, vand, vbic, ASimdCompare,
VShiftImmediateBasic,
VShiftImmediateRounding): ExecutionUnit.V(),

(vadd, vsub,
trn1, trn2): ExecutionUnit.V(),
Expand Down Expand Up @@ -153,8 +154,9 @@ def get_min_max_objective(slothy):
vmls, vmls_lane,
vqdmulh_lane,
vmull, vmlal,
vsrshr, vushr, vusra, vshl,
vand, vbic, ASimdCompare): 1,
vusra, vand, vbic, ASimdCompare,
VShiftImmediateBasic,
VShiftImmediateRounding): 1,
(vadd, vsub,
trn1, trn2): 1,

Expand Down Expand Up @@ -207,8 +209,9 @@ def get_min_max_objective(slothy):
vmla, vmla_lane,
vqdmulh_lane,
vmull, vmlal,
vsrshr, vusra): 3,
(vshl, vushr,
vusra): 3,
VShiftImmediateRounding: 3,
(VShiftImmediateBasic,
vand, vbic, ASimdCompare): 2,
(vadd, vsub,
trn1, trn2): 2,
Expand Down
15 changes: 9 additions & 6 deletions slothy/targets/aarch64/cortex_a55.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,11 +109,13 @@ def get_min_max_objective(slothy):
vmla, vmla_lane,
vqrdmulh, vqrdmulh_lane,
vqdmulh_lane,
vsrshr, vand, vbic,
vand, vbic,
Ldr_Q,
Str_Q,
q_ldr1_stack, Q_Ld2_Lane_Post_Inc,
Vmull, Vmlal, vushr, vusra
Vmull, Vmlal, vusra,
vushr, vsshr,
VShiftImmediateRounding,
): [[ExecutionUnit.VEC0, ExecutionUnit.VEC1]], # these instructions use both VEC0 and VEC1

St4 : [[ExecutionUnit.VEC0, ExecutionUnit.VEC1, ExecutionUnit.SCALAR_LOAD,
Expand Down Expand Up @@ -176,7 +178,7 @@ def get_min_max_objective(slothy):
( vadd, vsub, vmov,
vmul, vmul_lane, vmls, vmls_lane,
vqrdmulh, vqrdmulh_lane, vqdmulh_lane, Vmull, Vmlal,
vsrshr, umov_d ) : 1,
umov_d ) : 1,
(trn2, trn1, ASimdCompare): 1,
( Ldr_Q ) : 2,
( Str_Q ) : 1,
Expand All @@ -199,7 +201,8 @@ def get_min_max_objective(slothy):
adcs_zero_r_to_zero, cmn) : 1,
(cmp_xzr2, sub, subs_wform, asr_wform, sbcs_zero_to_zero, ngc_zero) : 1,
(bfi) : 1,
(vshl, vshl, vushr) : 1,
VShiftImmediateRounding : 1,
VShiftImmediateBasic : 1,
(vusra) : 1,
(vand, vbic) : 1,
(vuzp1, vuzp2) : 1,
Expand All @@ -218,7 +221,6 @@ def get_min_max_objective(slothy):
is_dform_form_of([vadd, vsub]) : 2,

(trn1, trn2, ASimdCompare): 2,
( vsrshr ) : 3,
( vmul, vmul_lane, vmls, vmls_lane,
vqrdmulh, vqrdmulh_lane, vqdmulh_lane, Vmull, Vmlal) : 4,
( Ldr_Q, Str_Q ) : 4,
Expand All @@ -244,7 +246,8 @@ def get_min_max_objective(slothy):
sub, subs_wform, asr_wform, sbcs_zero_to_zero, cmp_xzr2,
ngc_zero) : 1,
(bfi) : 2,
(vshl, vushr) : 2,
VShiftImmediateRounding : 3,
VShiftImmediateBasic : 2,
(vusra) : 3,
(vand, vbic) : 1,
(vuzp1, vuzp2) : 2,
Expand Down
10 changes: 7 additions & 3 deletions slothy/targets/aarch64/cortex_a72_frontend.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,8 @@ def get_min_max_objective(slothy):

(add, add_imm, add_lsl, add_lsr) : ExecutionUnit.SCALAR(),

vsrshr : [ExecutionUnit.ASIMD1],
(VShiftImmediateRounding,
VShiftImmediateBasic): [ExecutionUnit.ASIMD1],

(St4, St2) : [ExecutionUnit.ASIMD0, ExecutionUnit.ASIMD1],

Expand Down Expand Up @@ -164,7 +165,8 @@ def get_min_max_objective(slothy):
Ldr_X, Str_X )
: 1,

vsrshr : 1,
(VShiftImmediateRounding,
VShiftImmediateBasic): 1,

St2 : 4,
St4 : 8,
Expand Down Expand Up @@ -195,7 +197,9 @@ def get_min_max_objective(slothy):

(add, add_imm, add_lsl, add_lsr) : 2,

vsrshr : 3, # approx
VShiftImmediateRounding: 3, # approx
VShiftImmediateBasic: 3,

St2 : 4,
St4 : 8,
Ld4 : 4
Expand Down

0 comments on commit cbd97a1

Please sign in to comment.