Skip to content

Commit

Permalink
Merge pull request #133 from dop-amin/ext-branch-loop
Browse files Browse the repository at this point in the history
Armv7m: Allow subs with immediate and two registers
  • Loading branch information
mkannwischer authored Dec 20, 2024
2 parents 1d9ab09 + faca2b2 commit f58e6b3
Show file tree
Hide file tree
Showing 5 changed files with 71 additions and 4 deletions.
3 changes: 3 additions & 0 deletions example.py
Original file line number Diff line number Diff line change
Expand Up @@ -704,6 +704,9 @@ def __init__(self, var="", arch=Arch_Armv7M, target=Target_CortexM7):
def core(self,slothy):
slothy.config.variable_size=True
slothy.optimize_loop("start", forced_loop_type=Arch_Armv7M.SubsLoop)
slothy.config.sw_pipelining.enabled = True
slothy.config.outputs = ["r0", "r1", "r2", "r5", "flags"]
slothy.optimize_loop("start2", forced_loop_type=Arch_Armv7M.BranchLoop)

class Armv7mLoopCmp(Example):
def __init__(self, var="", arch=Arch_Armv7M, target=Target_CortexM7):
Expand Down
10 changes: 9 additions & 1 deletion examples/naive/armv7m/loop_subs.s
Original file line number Diff line number Diff line change
@@ -1,4 +1,12 @@
movw r5, #16
start:
subs.w r5, #1
bne.w start
bne.w start

movw r5, #16
start2:
eor.w r0, r0, r7
mul r1, r0, r8
eor.w r0, r1, r4
subs.w r5, r5, #1
bne.w start2
48 changes: 46 additions & 2 deletions examples/opt/armv7m/loop_subs_opt_m7.s
Original file line number Diff line number Diff line change
@@ -1,11 +1,55 @@
movw r5, #16
start:
// Instructions: 0
// Expected cycles: 0
// Expected IPC: 0.00
//
// Wall time: 0.02s
// User time: 0.02s
//
subs r5, #1
bne start

movw r5, #16
// Instructions: 0
// Expected cycles: 0
// Expected IPC: 0.00
//
// Wall time: 0.00s
// User time: 0.00s
//
subs r5, #1
bne start
start2:
// Instructions: 4
// Expected cycles: 4
// Expected IPC: 1.00
//
// Cycle bound: 2.0
// IPC bound: 2.00
//
// Wall time: 0.03s
// User time: 0.03s
//
// ----- cycle (expected) ------>
// 0 25
// |------------------------|----
subs.w r5, r5, #1 // *.............................
eor.w r0, r0, r7 // *.............................
mul r1, r0, r8 // .*............................
eor.w r0, r1, r4 // ...*..........................

// ------ cycle (expected) ------>
// 0 25
// |------------------------|-----
// eor.w r0, r0, r7 // *...~...~...~...~...~...~...~..
// mul r1, r0, r8 // .*..'~..'~..'~..'~..'~..'~..'~.
// eor.w r0, r1, r4 // ...*'..~'..~'..~'..~'..~'..~'..
// subs.w r5, r5, #1 // *...~...~...~...~...~...~...~..

bne start2
// Instructions: 0
// Expected cycles: 0
// Expected IPC: 0.00
//
// Wall time: 0.00s
// User time: 0.00s
//
10 changes: 10 additions & 0 deletions slothy/targets/arm_v7m/arch_v7m.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,10 @@ def start(self, loop_cnt, indentation=0, fixup=0, unroll=1, jump_if_empty=None,
loop_cnt_reg = inst[0].args_in_out[0]
loop_end_reg = inst[0].args_in_out[0]
break
elif isinstance(inst[0], subs_imm):
loop_cnt_reg = inst[0].args_out[0]
loop_end_reg = inst[0].args_out[0]
break

# Find FPR that is used to stash the loop end incase it's vmov loop
loop_end_reg_fpr = None
Expand Down Expand Up @@ -1215,6 +1219,12 @@ class sub_imm_short(Armv7mBasicArithmetic): # pylint: disable=missing-docstring,
pattern = "sub<width> <Ra>, <imm>"
in_outs = ["Ra"]

class subs_imm(Armv7mBasicArithmetic): # pylint: disable=missing-docstring,invalid-name
pattern = "subs<width> <Rd>, <Ra>, <imm>"
inputs = ["Ra"]
outputs = ["Rd"]
modifiesFlags = True

class subs_imm_short(Armv7mBasicArithmetic): # pylint: disable=missing-docstring,invalid-name
pattern = "subs<width> <Ra>, <imm>"
in_outs = ["Ra"]
Expand Down
4 changes: 3 additions & 1 deletion slothy/targets/arm_v7m/cortex_m7.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ def get_min_max_objective(slothy):
add_short,
add_imm,
add_imm_short,
sub, subs_imm_short, sub_imm_short,
sub, subs_imm, subs_imm_short, sub_imm_short,
neg_short,
log_and,
log_or,
Expand Down Expand Up @@ -185,6 +185,7 @@ def get_min_max_objective(slothy):
add_shifted,
sub_shifted,
sub_imm_short,
subs_imm,
subs_imm_short,
uadd16, sadd16, usub16, ssub16,
mul, mul_short,
Expand Down Expand Up @@ -228,6 +229,7 @@ def get_min_max_objective(slothy):
add_shifted,
sub_shifted,
sub_imm_short,
subs_imm,
subs_imm_short,
uadd16, sadd16, usub16, ssub16,
neg_short,
Expand Down

0 comments on commit f58e6b3

Please sign in to comment.