diff --git a/example.py b/example.py index a2ddb793..49925f29 100644 --- a/example.py +++ b/example.py @@ -704,6 +704,9 @@ def __init__(self, var="", arch=Arch_Armv7M, target=Target_CortexM7): def core(self,slothy): slothy.config.variable_size=True slothy.optimize_loop("start", forced_loop_type=Arch_Armv7M.SubsLoop) + slothy.config.sw_pipelining.enabled = True + slothy.config.outputs = ["r0", "r1", "r2", "r5", "flags"] + slothy.optimize_loop("start2", forced_loop_type=Arch_Armv7M.BranchLoop) class Armv7mLoopCmp(Example): def __init__(self, var="", arch=Arch_Armv7M, target=Target_CortexM7): diff --git a/examples/naive/armv7m/loop_subs.s b/examples/naive/armv7m/loop_subs.s index 5dd05c9c..938198cc 100644 --- a/examples/naive/armv7m/loop_subs.s +++ b/examples/naive/armv7m/loop_subs.s @@ -1,4 +1,12 @@ movw r5, #16 start: subs.w r5, #1 - bne.w start \ No newline at end of file + bne.w start + +movw r5, #16 +start2: + eor.w r0, r0, r7 + mul r1, r0, r8 + eor.w r0, r1, r4 + subs.w r5, r5, #1 + bne.w start2 \ No newline at end of file diff --git a/examples/opt/armv7m/loop_subs_opt_m7.s b/examples/opt/armv7m/loop_subs_opt_m7.s index f1bcc451..6eb4f593 100644 --- a/examples/opt/armv7m/loop_subs_opt_m7.s +++ b/examples/opt/armv7m/loop_subs_opt_m7.s @@ -1,5 +1,16 @@ movw r5, #16 start: + // Instructions: 0 + // Expected cycles: 0 + // Expected IPC: 0.00 + // + // Wall time: 0.02s + // User time: 0.02s + // + subs r5, #1 + bne start + +movw r5, #16 // Instructions: 0 // Expected cycles: 0 // Expected IPC: 0.00 @@ -7,5 +18,38 @@ start: // Wall time: 0.00s // User time: 0.00s // - subs r5, #1 - bne start \ No newline at end of file +start2: + // Instructions: 4 + // Expected cycles: 4 + // Expected IPC: 1.00 + // + // Cycle bound: 2.0 + // IPC bound: 2.00 + // + // Wall time: 0.03s + // User time: 0.03s + // + // ----- cycle (expected) ------> + // 0 25 + // |------------------------|---- + subs.w r5, r5, #1 // *............................. + eor.w r0, r0, r7 // *............................. + mul r1, r0, r8 // .*............................ + eor.w r0, r1, r4 // ...*.......................... + + // ------ cycle (expected) ------> + // 0 25 + // |------------------------|----- + // eor.w r0, r0, r7 // *...~...~...~...~...~...~...~.. + // mul r1, r0, r8 // .*..'~..'~..'~..'~..'~..'~..'~. + // eor.w r0, r1, r4 // ...*'..~'..~'..~'..~'..~'..~'.. + // subs.w r5, r5, #1 // *...~...~...~...~...~...~...~.. + + bne start2 + // Instructions: 0 + // Expected cycles: 0 + // Expected IPC: 0.00 + // + // Wall time: 0.00s + // User time: 0.00s + // \ No newline at end of file diff --git a/slothy/targets/arm_v7m/arch_v7m.py b/slothy/targets/arm_v7m/arch_v7m.py index 56e1f4ed..9e20c2fc 100644 --- a/slothy/targets/arm_v7m/arch_v7m.py +++ b/slothy/targets/arm_v7m/arch_v7m.py @@ -338,6 +338,10 @@ def start(self, loop_cnt, indentation=0, fixup=0, unroll=1, jump_if_empty=None, loop_cnt_reg = inst[0].args_in_out[0] loop_end_reg = inst[0].args_in_out[0] break + elif isinstance(inst[0], subs_imm): + loop_cnt_reg = inst[0].args_out[0] + loop_end_reg = inst[0].args_out[0] + break # Find FPR that is used to stash the loop end incase it's vmov loop loop_end_reg_fpr = None @@ -1215,6 +1219,12 @@ class sub_imm_short(Armv7mBasicArithmetic): # pylint: disable=missing-docstring, pattern = "sub , " in_outs = ["Ra"] +class subs_imm(Armv7mBasicArithmetic): # pylint: disable=missing-docstring,invalid-name + pattern = "subs , , " + inputs = ["Ra"] + outputs = ["Rd"] + modifiesFlags = True + class subs_imm_short(Armv7mBasicArithmetic): # pylint: disable=missing-docstring,invalid-name pattern = "subs , " in_outs = ["Ra"] diff --git a/slothy/targets/arm_v7m/cortex_m7.py b/slothy/targets/arm_v7m/cortex_m7.py index 57cc8f11..a819941f 100644 --- a/slothy/targets/arm_v7m/cortex_m7.py +++ b/slothy/targets/arm_v7m/cortex_m7.py @@ -143,7 +143,7 @@ def get_min_max_objective(slothy): add_short, add_imm, add_imm_short, - sub, subs_imm_short, sub_imm_short, + sub, subs_imm, subs_imm_short, sub_imm_short, neg_short, log_and, log_or, @@ -185,6 +185,7 @@ def get_min_max_objective(slothy): add_shifted, sub_shifted, sub_imm_short, + subs_imm, subs_imm_short, uadd16, sadd16, usub16, ssub16, mul, mul_short, @@ -228,6 +229,7 @@ def get_min_max_objective(slothy): add_shifted, sub_shifted, sub_imm_short, + subs_imm, subs_imm_short, uadd16, sadd16, usub16, ssub16, neg_short,