diff --git a/slothy/core/slothy.py b/slothy/core/slothy.py index 858d0c95..a7174955 100644 --- a/slothy/core/slothy.py +++ b/slothy/core/slothy.py @@ -450,10 +450,7 @@ def fusion_loop(self, loop_lbl, forced_loop_type=None, **kwargs): pre , body, post, _, other_data, loop = \ self.arch.Loop.extract(self.source, loop_lbl, forced_loop_type=forced_loop_type) - try: - loop_cnt = other_data['cnt'] - except KeyError: - loop_cnt = None + loop_cnt = other_data['cnt'] indentation = AsmHelper.find_indentation(body) body_ssa = SourceLine.read_multiline(loop.start(loop_cnt)) + \ diff --git a/slothy/helper.py b/slothy/helper.py index 5392edb9..44dedaed 100644 --- a/slothy/helper.py +++ b/slothy/helper.py @@ -1316,7 +1316,7 @@ def run_code(code, txt=None): # If we expect a function return, put a valid address in the LR # that serves as the marker to terminate emulation mu.reg_write(config.arch.RegisterType.unicorn_link_register(), CODE_END) - # Setup stack and allocate allocate initial stack memory + # Setup stack and allocate initial stack memory mu.reg_write(config.arch.RegisterType.unicorn_stack_pointer(), STACK_TOP - config.selftest_default_memory_size) # Copy code into emulator mu.mem_map(CODE_BASE, CODE_SZ) @@ -1336,11 +1336,11 @@ def run_code(code, txt=None): mu.emu_start(CODE_BASE + offset, CODE_BASE + len(objcode)) else: mu.emu_start(CODE_BASE + offset, CODE_END) - except: + except UcError as e: log.error("Failed to emulate code using unicorn engine") log.error("Code") log.error(SourceLine.write_multiline(code)) - raise SelfTestException("Selftest failed: Unicorn failed to emulate code") + raise SelfTestException(f"Selftest failed: Unicorn failed to emulate code: {str(e)}") from e final_register_contents = {} for r in regs: @@ -1352,6 +1352,15 @@ def run_code(code, txt=None): return final_register_contents, final_memory_contents + def failure_dump(): + log.error("Selftest failed") + log.error("Input code:") + log.error(SourceLine.write_multiline(codeA)) + log.error("Output code:") + log.error(SourceLine.write_multiline(codeB)) + log.error("Output registers:") + log.error(output_registers) + for _ in range(iterations): initial_memory = os.urandom(RAM_SZ) initial_stack = os.urandom(STACK_SZ) @@ -1372,6 +1381,7 @@ def run_code(code, txt=None): # Check if memory contents are the same if final_mem_old != final_mem_new: + failure_dump() raise SelfTestException(f"Selftest failed: Memory mismatch") # Check that callee-saved registers are the same @@ -1380,6 +1390,7 @@ def run_code(code, txt=None): if r.startswith("hint_"): continue if final_regs_old[r] != final_regs_new[r]: + failure_dump() raise SelfTestException(f"Selftest failed: Register mismatch for {r}: {hex(final_regs_old[r])} != {hex(final_regs_new[r])}") if fnsym is None: diff --git a/slothy/targets/aarch64/aarch64_big_experimental.py b/slothy/targets/aarch64/aarch64_big_experimental.py index 8f491c41..c53b9667 100644 --- a/slothy/targets/aarch64/aarch64_big_experimental.py +++ b/slothy/targets/aarch64/aarch64_big_experimental.py @@ -100,7 +100,8 @@ def get_min_max_objective(slothy): (vand, vadd) : ExecutionUnit.V(), (vxtn) : ExecutionUnit.V(), veor3 : ExecutionUnit.V(), - (vshl, vshl_d, vshli, vshrn) : ExecutionUnit.V1(), + (VShiftImmediateBasic, + vshl_d, vshli, vshrn) : ExecutionUnit.V1(), # TODO: Should be V13? vusra : ExecutionUnit.V1(), AESInstruction : ExecutionUnit.V(), Transpose : ExecutionUnit.V(), @@ -141,7 +142,8 @@ def get_min_max_objective(slothy): AArch64NeonLogical : 1, (vmovi) : 1, (vxtn) : 1, - (vshl, vshl_d, vshli, vshrn) : 1, + (VShiftImmediateBasic, + vshl_d, vshli, vshrn) : 1, (vmul) : 2, vusra : 1, (vmlal, vmull) : 1, @@ -180,7 +182,8 @@ def get_min_max_objective(slothy): (vmul) : 5, vusra : 4, # TODO: Add fwd path (vmlal, vmull) : 4, # TODO: Add fwd path - (vshl, vshl_d, vshli, vshrn) : 2, + (VShiftImmediateBasic, + vshl_d, vshli, vshrn) : 2, (AArch64BasicArithmetic, AArch64ConditionalSelect, AArch64ConditionalCompare, diff --git a/slothy/targets/aarch64/aarch64_neon.py b/slothy/targets/aarch64/aarch64_neon.py index b6db8e4b..69bca5ed 100644 --- a/slothy/targets/aarch64/aarch64_neon.py +++ b/slothy/targets/aarch64/aarch64_neon.py @@ -2531,12 +2531,33 @@ class vsmlal2(Vmlal): # pylint: disable=missing-docstring,invalid-name inputs = ["Va", "Vb"] in_outs=["Vd"] -class vsrshr(AArch64Instruction): # pylint: disable=missing-docstring,invalid-name +class VShiftImmediateBasic(AArch64Instruction): + pass + +class VShiftImmediateRounding(AArch64Instruction): + pass + +class vsrshr(VShiftImmediateRounding): # pylint: disable=missing-docstring,invalid-name pattern = "srshr ., ., " inputs = ["Va"] outputs = ["Vd"] -class vshl(AArch64Instruction): # pylint: disable=missing-docstring,invalid-name +class vurshr(VShiftImmediateRounding): # pylint: disable=missing-docstring,invalid-name + pattern = "urshr ., ., " + inputs = ["Va"] + outputs = ["Vd"] + +class vsshr(VShiftImmediateBasic): # pylint: disable=missing-docstring,invalid-name + pattern = "sshr ., ., " + inputs = ["Va"] + outputs = ["Vd"] + +class vushr(VShiftImmediateBasic): # pylint: disable=missing-docstring,invalid-name + pattern = "ushr ., ., " + inputs = ["Va"] + outputs = ["Vd"] + +class vshl(VShiftImmediateBasic): # pylint: disable=missing-docstring,invalid-name pattern = "shl ., ., " inputs = ["Va"] outputs = ["Vd"] @@ -2607,11 +2628,6 @@ def make(cls, src, force=False): raise Instruction.ParsingException("Instruction ignored") return AArch64Instruction.build(cls, src) -class vushr(AArch64Instruction): # pylint: disable=missing-docstring,invalid-name - pattern = "ushr ., ., " - inputs = ["Va"] - outputs = ["Vd"] - class Transpose(AArch64Instruction): # pylint: disable=missing-docstring,invalid-name pass @@ -3222,7 +3238,7 @@ def eor3_fusion_cb(): """ Example for a fusion call back. Allows to merge two eor instruction with two inputs into one eor with three inputs. Such technique can help perform - transformations in case of differences between uArchs. + transformations in case of differences between uArchs. Note: This is not used in any real (crypto) example. This is merely a PoC. """ def core(inst,t,log=None): @@ -3285,7 +3301,7 @@ def eor3_splitting_cb(): """ Example for a splitting call back. Allows to split one eor instruction with three inputs into two eors with two inputs. Such technique can help perform - transformations in case of differences between uArchs. + transformations in case of differences between uArchs. Note: This is not used in any real (crypto) example. This is merely a PoC. """ def core(inst,t,log=None): diff --git a/slothy/targets/aarch64/apple_m1_firestorm_experimental.py b/slothy/targets/aarch64/apple_m1_firestorm_experimental.py index 5c327d32..350a7069 100644 --- a/slothy/targets/aarch64/apple_m1_firestorm_experimental.py +++ b/slothy/targets/aarch64/apple_m1_firestorm_experimental.py @@ -119,8 +119,11 @@ def get_min_max_objective(slothy): vqrdmulh, vqrdmulh_lane, vqdmulh_lane, vmull, vmlal, - vsrshr, vushr, vusra, vshl, - vand, vbic, ASimdCompare): ExecutionUnit.V(), + vsrshr, vusra, + vand, vbic, ASimdCompare, + VShiftImmediateBasic, + VShiftImmediateRounding + ): ExecutionUnit.V(), (vadd, vsub, trn1, trn2): ExecutionUnit.V(), Vins: ExecutionUnit.V(), # guessed @@ -183,8 +186,10 @@ def get_min_max_objective(slothy): vmls, vmls_lane, vqdmulh_lane, vmull, vmlal, - vsrshr, vushr, vusra, vshl, - vand, vbic, ASimdCompare): 1, + vusra, + vand, vbic, ASimdCompare, + VShiftImmediateRounding, + VShiftImmediateBasic): 1, (vadd, vsub, trn1, trn2): 1, @@ -237,9 +242,10 @@ def get_min_max_objective(slothy): vmla, vmla_lane, vqdmulh_lane, vmull, vmlal, - vsrshr, vusra): 3, - (vshl, vushr, - vand, vbic, ASimdCompare): 2, + vusra): 3, + VShiftImmediateRounding: 3, + (vand, vbic, ASimdCompare, + VShiftImmediateBasic): 2, (vadd, vsub, trn1, trn2): 2, Vins: 2, # or something less than 13 @@ -293,7 +299,7 @@ def get_latency(src, out_idx, dst): if instclass_src == umaddl_wform and instclass_dst == umaddl_wform and \ src.args_out[0] == dst.args_in[2]: return (3, lambda t_src, t_dst: t_dst.program_start_var == t_src.program_start_var + 1) - + return latency diff --git a/slothy/targets/aarch64/apple_m1_icestorm_experimental.py b/slothy/targets/aarch64/apple_m1_icestorm_experimental.py index dd2fa4d5..879e674c 100644 --- a/slothy/targets/aarch64/apple_m1_icestorm_experimental.py +++ b/slothy/targets/aarch64/apple_m1_icestorm_experimental.py @@ -97,8 +97,9 @@ def get_min_max_objective(slothy): vqrdmulh, vqrdmulh_lane, vqdmulh_lane, vmull, vmlal, - vsrshr, vushr, vusra, vshl, - vand, vbic, ASimdCompare): ExecutionUnit.V(), + vusra, vand, vbic, ASimdCompare, + VShiftImmediateBasic, + VShiftImmediateRounding): ExecutionUnit.V(), (vadd, vsub, trn1, trn2): ExecutionUnit.V(), @@ -153,8 +154,9 @@ def get_min_max_objective(slothy): vmls, vmls_lane, vqdmulh_lane, vmull, vmlal, - vsrshr, vushr, vusra, vshl, - vand, vbic, ASimdCompare): 1, + vusra, vand, vbic, ASimdCompare, + VShiftImmediateBasic, + VShiftImmediateRounding): 1, (vadd, vsub, trn1, trn2): 1, @@ -207,8 +209,9 @@ def get_min_max_objective(slothy): vmla, vmla_lane, vqdmulh_lane, vmull, vmlal, - vsrshr, vusra): 3, - (vshl, vushr, + vusra): 3, + VShiftImmediateRounding: 3, + (VShiftImmediateBasic, vand, vbic, ASimdCompare): 2, (vadd, vsub, trn1, trn2): 2, diff --git a/slothy/targets/aarch64/cortex_a55.py b/slothy/targets/aarch64/cortex_a55.py index 37dac2ee..7cdadb13 100644 --- a/slothy/targets/aarch64/cortex_a55.py +++ b/slothy/targets/aarch64/cortex_a55.py @@ -109,11 +109,13 @@ def get_min_max_objective(slothy): vmla, vmla_lane, vqrdmulh, vqrdmulh_lane, vqdmulh_lane, - vsrshr, vand, vbic, + vand, vbic, Ldr_Q, Str_Q, q_ldr1_stack, Q_Ld2_Lane_Post_Inc, - Vmull, Vmlal, vushr, vusra + Vmull, Vmlal, vusra, + vushr, vsshr, + VShiftImmediateRounding, ): [[ExecutionUnit.VEC0, ExecutionUnit.VEC1]], # these instructions use both VEC0 and VEC1 St4 : [[ExecutionUnit.VEC0, ExecutionUnit.VEC1, ExecutionUnit.SCALAR_LOAD, @@ -176,7 +178,7 @@ def get_min_max_objective(slothy): ( vadd, vsub, vmov, vmul, vmul_lane, vmls, vmls_lane, vqrdmulh, vqrdmulh_lane, vqdmulh_lane, Vmull, Vmlal, - vsrshr, umov_d ) : 1, + umov_d ) : 1, (trn2, trn1, ASimdCompare): 1, ( Ldr_Q ) : 2, ( Str_Q ) : 1, @@ -199,7 +201,8 @@ def get_min_max_objective(slothy): adcs_zero_r_to_zero, cmn) : 1, (cmp_xzr2, sub, subs_wform, asr_wform, sbcs_zero_to_zero, ngc_zero) : 1, (bfi) : 1, - (vshl, vshl, vushr) : 1, + VShiftImmediateRounding : 1, + VShiftImmediateBasic : 1, (vusra) : 1, (vand, vbic) : 1, (vuzp1, vuzp2) : 1, @@ -218,7 +221,6 @@ def get_min_max_objective(slothy): is_dform_form_of([vadd, vsub]) : 2, (trn1, trn2, ASimdCompare): 2, - ( vsrshr ) : 3, ( vmul, vmul_lane, vmls, vmls_lane, vqrdmulh, vqrdmulh_lane, vqdmulh_lane, Vmull, Vmlal) : 4, ( Ldr_Q, Str_Q ) : 4, @@ -244,7 +246,8 @@ def get_min_max_objective(slothy): sub, subs_wform, asr_wform, sbcs_zero_to_zero, cmp_xzr2, ngc_zero) : 1, (bfi) : 2, - (vshl, vushr) : 2, + VShiftImmediateRounding : 3, + VShiftImmediateBasic : 2, (vusra) : 3, (vand, vbic) : 1, (vuzp1, vuzp2) : 2, diff --git a/slothy/targets/aarch64/cortex_a72_frontend.py b/slothy/targets/aarch64/cortex_a72_frontend.py index 1a74a7dc..c2bd0554 100644 --- a/slothy/targets/aarch64/cortex_a72_frontend.py +++ b/slothy/targets/aarch64/cortex_a72_frontend.py @@ -131,7 +131,8 @@ def get_min_max_objective(slothy): (add, add_imm, add_lsl, add_lsr) : ExecutionUnit.SCALAR(), - vsrshr : [ExecutionUnit.ASIMD1], + (VShiftImmediateRounding, + VShiftImmediateBasic): [ExecutionUnit.ASIMD1], (St4, St2) : [ExecutionUnit.ASIMD0, ExecutionUnit.ASIMD1], @@ -164,7 +165,8 @@ def get_min_max_objective(slothy): Ldr_X, Str_X ) : 1, - vsrshr : 1, + (VShiftImmediateRounding, + VShiftImmediateBasic): 1, St2 : 4, St4 : 8, @@ -195,7 +197,9 @@ def get_min_max_objective(slothy): (add, add_imm, add_lsl, add_lsr) : 2, - vsrshr : 3, # approx + VShiftImmediateRounding: 3, # approx + VShiftImmediateBasic: 3, + St2 : 4, St4 : 8, Ld4 : 4 diff --git a/slothy/targets/arm_v7m/arch_v7m.py b/slothy/targets/arm_v7m/arch_v7m.py index 06655959..ffe3ae4c 100644 --- a/slothy/targets/arm_v7m/arch_v7m.py +++ b/slothy/targets/arm_v7m/arch_v7m.py @@ -2158,4 +2158,4 @@ def match(x): return v if default is None: raise UnknownInstruction(f"Couldn't find {instclass} for {inst}") - return default \ No newline at end of file + return default