Skip to content

Commit

Permalink
Armv7E-M: Add smlatb and ldrb_with_postinc
Browse files Browse the repository at this point in the history
  • Loading branch information
mkannwischer committed Dec 5, 2024
1 parent 8df6fb5 commit 2720ed3
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 4 deletions.
22 changes: 22 additions & 0 deletions slothy/targets/arm_v7m/arch_v7m.py
Original file line number Diff line number Diff line change
Expand Up @@ -483,6 +483,7 @@ def is_load(self):
ldr_with_imm_stack,
ldr_with_postinc,
ldrh_with_postinc,
ldrb_with_postinc,
ldrd_imm,
ldrd_with_postinc,
ldr_with_inc_writeback,
Expand Down Expand Up @@ -1083,6 +1084,12 @@ class smlatt(Armv7mMultiplication): # pylint: disable=missing-docstring,invalid-
inputs = ["Ra","Rb", "Rc"]
outputs = ["Rd"]

class smlatb(Armv7mMultiplication): # pylint: disable=missing-docstring,invalid-name
pattern = "smlatb<width> <Rd>, <Ra>, <Rb>, <Rc>"
inputs = ["Ra","Rb", "Rc"]
outputs = ["Rd"]


class smull(Armv7mMultiplication): # pylint: disable=missing-docstring,invalid-name
pattern = "smull<width> <Ra>, <Rb>, <Rc>, <Rd>"
inputs = ["Rc","Rd"]
Expand Down Expand Up @@ -1355,6 +1362,21 @@ def make(cls, src):
obj.addr = obj.args_in_out[0]
return obj


class ldrb_with_postinc(Armv7mLoadInstruction): # pylint: disable=missing-docstring,invalid-name
pattern = "ldrb<width> <Rd>, [<Ra>], <imm>"
in_outs = [ "Ra" ]
outputs = ["Rd"]
@classmethod
def make(cls, src):
obj = Armv7mLoadInstruction.build(cls, src)
obj.increment = obj.immediate
obj.args_inout_out_different = [(0,0)] # Can't have Rd==Ra
obj.pre_index = None
obj.addr = obj.args_in_out[0]
return obj


class Ldrd(Armv7mLoadInstruction):
pass

Expand Down
12 changes: 8 additions & 4 deletions slothy/targets/arm_v7m/cortex_m7.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ def get_min_max_objective(slothy):
ldrb_with_imm,
ldrh_with_imm,
ldrh_with_postinc,
ldrb_with_postinc,
vldr_with_imm, vldr_with_postinc # TODO: also FPU?
): ExecutionUnit.LOAD(),
(
Expand Down Expand Up @@ -152,7 +153,7 @@ def get_min_max_objective(slothy):
): ExecutionUnit.ALU(),
(ror, ror_short, rors_short, lsl, asr, asrs): [[ExecutionUnit.ALU0], [ExecutionUnit.ALU1]],
(mul, mul_short, smull, smlal, mla, mls, smulwb, smulwt, smultb, smultt,
smulbb, smlabt, smlabb, smlatt, smlad, smladx, smuad, smuadx, smmulr): [ExecutionUnit.MAC],
smulbb, smlabt, smlabb, smlatt, smlatb, smlad, smladx, smuad, smuadx, smmulr): [ExecutionUnit.MAC],
(vmov_gpr, vmov_gpr2, vmov_gpr2_dual): [ExecutionUnit.FPU],
(uadd16, sadd16, usub16, ssub16): list(map(list, product(ExecutionUnit.ALU(), [ExecutionUnit.SIMD]))),
(pkhbt, pkhtb, pkhbt_shifted, ubfx_imm): [[ExecutionUnit.ALU0, ExecutionUnit.SIMD]],
Expand All @@ -170,6 +171,7 @@ def get_min_max_objective(slothy):
ldrb_with_imm,
ldrh_with_imm,
ldrh_with_postinc,
ldrb_with_postinc,
vldr_with_imm, vldr_with_postinc, # TODO: double-check
# actually not, just placeholder
ldm_interval, ldm_interval_inc_writeback, vldm_interval_inc_writeback,
Expand All @@ -188,7 +190,7 @@ def get_min_max_objective(slothy):
mul, mul_short,
smull,
smlal,
mla, mls, smulwb, smulwt, smultb, smultt, smulbb, smlabt, smlabb, smlatt, smlad, smladx, smuad, smuadx, smmulr,
mla, mls, smulwb, smulwt, smultb, smultt, smulbb, smlabt, smlabb, smlatt, smlatb, smlad, smladx, smuad, smuadx, smmulr,
neg_short,
log_and, log_and_shifted,
log_or, log_or_shifted,
Expand Down Expand Up @@ -251,7 +253,7 @@ def get_min_max_objective(slothy):
mul, mul_short,
smull,
smlal,
mla, mls, smulwb, smulwt, smultb, smultt, smulbb, smlabt, smlabb, smlatt, smlad, smladx, smuad, smuadx, smmulr,
mla, mls, smulwb, smulwt, smultb, smultt, smulbb, smlabt, smlabb, smlatt, smlatb, smlad, smladx, smuad, smuadx, smmulr,
# TODO: Verify load latency
stm_interval_inc_writeback, # actually not, just placeholder
ldr,
Expand All @@ -262,6 +264,8 @@ def get_min_max_objective(slothy):
ldrb_with_imm,
ldrh_with_imm,
ldrh_with_postinc,
ldrb_with_postinc,
ldrb_with_postinc,
eor_shifted
): 2,
(Ldrd): 3,
Expand All @@ -279,7 +283,7 @@ def get_latency(src, out_idx, dst):
latency = lookup_multidict(default_latencies, src)

# Forwarding path to MAC instructions
if instclass_dst in [mla, mls, smlabb, smlabt, smlatt] and src.args_out[0] == dst.args_in[2]:
if instclass_dst in [mla, mls, smlabb, smlabt, smlatt, smlatb] and src.args_out[0] == dst.args_in[2]:
latency = latency - 1

if instclass_dst in [smlal] and \
Expand Down

0 comments on commit 2720ed3

Please sign in to comment.