From 7c737b69938ed04257bade44696649cbf084c796 Mon Sep 17 00:00:00 2001 From: Amin Abdulrahman Date: Fri, 29 Nov 2024 12:59:57 +0100 Subject: [PATCH 1/2] Cortex-M7: Fix MAC instructions to one issue slot --- slothy/targets/arm_v7m/cortex_m7.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/slothy/targets/arm_v7m/cortex_m7.py b/slothy/targets/arm_v7m/cortex_m7.py index a819941f..7a9892bd 100644 --- a/slothy/targets/arm_v7m/cortex_m7.py +++ b/slothy/targets/arm_v7m/cortex_m7.py @@ -56,11 +56,17 @@ def add_further_constraints(slothy): add_st_hazard(slothy) add_dsp_slot_constraint(slothy) + add_mac_slot_constraint(slothy) def add_dsp_slot_constraint(slothy): slothy.restrict_slots_for_instructions_by_class( [pkhbt, pkhtb, pkhbt_shifted, ubfx_imm, uadd16, usub16, sadd16, ssub16], [0]) +def add_mac_slot_constraint(slothy): + slothy.restrict_slots_for_instructions_by_class( + [mul, mul_short, smull, smlal, mla, mls, smulwb, smulwt, smultb, smultt, + smulbb, smlabt, smlabb, smlatt, smlad, smladx, smuad, smuadx, smmulr], [1]) + # TODO: this seems incorrect def add_slot_constraints(slothy): slothy.restrict_slots_for_instructions_by_class( From f6d2dc5c7c1400f28d3e69b943151892170fbd5e Mon Sep 17 00:00:00 2001 From: "Matthias J. Kannwischer" Date: Fri, 20 Dec 2024 16:38:30 +0800 Subject: [PATCH 2/2] M7: Adjust MAC forwarding paths --- slothy/targets/arm_v7m/cortex_m7.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/slothy/targets/arm_v7m/cortex_m7.py b/slothy/targets/arm_v7m/cortex_m7.py index 7a9892bd..f56e3cc9 100644 --- a/slothy/targets/arm_v7m/cortex_m7.py +++ b/slothy/targets/arm_v7m/cortex_m7.py @@ -291,12 +291,16 @@ def get_latency(src, out_idx, dst): latency = lookup_multidict(default_latencies, src) # Forwarding path to MAC instructions - if instclass_dst in [mla, mls, smlabb, smlabt, smlatt, smlatb] and src.args_out[0] == dst.args_in[2]: + if instclass_dst in [mla, mls, smlabb, smlabt, smlatt, smlatb] and dst.args_in[2] in (src.args_out + src.args_in_out): latency = latency - 1 - if instclass_dst in [smlal] and \ - (src.args_out[0] == dst.args_in_out[0] or src.args_out[0] == dst.args_in_out[1]): - latency = latency - 1 + if instclass_dst in [smlal]: + if len(src.args_out) > 1: + if (src.args_out[0] == dst.args_in_out[0] or src.args_out[0] == dst.args_in_out[1]): + latency = latency - 1 + elif len(src.args_in_out) > 1: + if (src.args_in_out[0] == dst.args_in_out[0] or src.args_in_out[0] == dst.args_in_out[1]): + latency = latency - 1 # Multiply accumulate chain latency is 1 if instclass_src in [smlal] and instclass_dst in [smlal] and \