From 0a97c71a4fa63766b58832c8c123588d2af6c9d6 Mon Sep 17 00:00:00 2001 From: Hanno Becker Date: Sat, 19 Oct 2024 19:08:11 +0100 Subject: [PATCH 01/16] Make address offset fixup configurable Address offset fixup is a feature which leverages commutativity relations such as ``` ldr X, [A], #immA; str Y, [A, #immB] == str Y, [A, #(immB+immA)] ldr X, [A], #immA ``` to achieve greater instruction scheduling flexibility in SLOTHY. When one enables this feature, one MUST ensure that registers which are used for addresses are not used in any other instruction than load and stores. Otherwise, the use of this feature is unsound (one may see ldr/ str instructions with increment reordered with instructions depending on the address register). Originally, address offset fixup was on by default and not configurable, which is dangerous, esp. if SLOTHY does not yet detect violations of the above condition. As a step towards fixing this, this commit makes address offset configurable for the AArch64 architecture model. The Armv8.1-M architecture model is not compatible with the new option: For the time being, address offset fixup must remain on in this case. --- slothy/core/config.py | 46 ++++++++++++++++ slothy/core/dataflow.py | 51 +++++++++++++++++ slothy/targets/aarch64/aarch64_neon.py | 76 ++++++++++++++------------ slothy/targets/arm_v81m/arch_v81m.py | 1 + 4 files changed, 140 insertions(+), 34 deletions(-) diff --git a/slothy/core/config.py b/slothy/core/config.py index 1ec12634..77d39669 100644 --- a/slothy/core/config.py +++ b/slothy/core/config.py @@ -148,6 +148,42 @@ def selfcheck_failure_logfile(self): sheer size.""" return self._selfcheck_failure_logfile + @property + def unsafe_address_offset_fixup(self): + """Whether address offset fixup is enabled + + Address offset fixup is a feature which leverages commutativity relations + such as + + ``` + ldr X, [A], #immA; + str Y, [A, #immB] + == + str Y, [A, #(immB+immA)] + ldr X, [A], #immA + ``` + + to achieve greater instruction scheduling flexibility in SLOTHY. + + SAFETY: + When you enable this feature, you MUST ensure that registers which are + used for addresses are not used in any other instruction than load and + stores. OTHERWISE, THE USE OF THIS FEATURE IS UNSOUND (you may see ldr/ + str instructions with increment reordered with instructions depending + on the address register). + + By default, this is enabled for backwards compatibility. + + LIMITATION: For historical reason, this feature cannot be disabled for + the Armv8.1-M architecture model. A refactoring of that model is needed + to make address offset fixup configurable. + + Note: The user-imposed safety constraint is not a necessity -- in principle, + SLOTHY could detect when it is safe to reorder ldr/str instructions with increment. + It just hasn't been implemented yet. + """ + return self._unsafe_address_offset_fixup + @property def allow_useless_instructions(self): """Indicates whether SLOTHY should abort upon encountering unused instructions. @@ -1111,6 +1147,11 @@ def __init__(self, Arch, Target): self._selfcheck_failure_logfile = None self._allow_useless_instructions = False + # TODO: This should be False by default, but this is a breaking + # change that requires a lot of examples (where it _is_ safe to + # apply address offset fixup) to be changed. + self._unsafe_address_offset_fixup = True + self._absorb_spills = True self._split_heuristic = False @@ -1228,6 +1269,11 @@ def selfcheck_failure_logfile(self,val): @allow_useless_instructions.setter def allow_useless_instructions(self,val): self._allow_useless_instructions = val + @unsafe_address_offset_fixup.setter + def unsafe_address_offset_fixup(self,val): + if val is False and self.arch.arch_name == "Arm_v81M": + raise InvalidConfig("unsafe address offset fixup must be set for Armv8.1-M") + self._unsafe_address_offset_fixup = val @locked_registers.setter def locked_registers(self,val): self._locked_registers = val diff --git a/slothy/core/dataflow.py b/slothy/core/dataflow.py index 276ad35f..e5f27005 100644 --- a/slothy/core/dataflow.py +++ b/slothy/core/dataflow.py @@ -359,6 +359,7 @@ def _load_slothy_config(self, slothy_config): self._inputs_are_outputs = self._slothy_config.inputs_are_outputs self._allow_useless_instructions = self._slothy_config.allow_useless_instructions self._absorb_spills = self._slothy_config.absorb_spills + self._unsafe_address_offset_fixup = self._slothy_config.unsafe_address_offset_fixup class DataFlowGraphException(Exception): """An exception triggered during parsing a data flow graph""" @@ -566,6 +567,53 @@ def fusion_cb(t): return t.inst.global_fusion_cb(t, log=logger.info) return self.apply_cbs(fusion_cb, logger, one_a_time=True) + def _address_offset_fixup_cbs(self): + logger = self.logger.getChild("address_fixup_cbs") + def address_offset_cb(t, log=None): + # Address offset fixup relaxes scheduling constraints + # for load/store instructions with increment. + if t.inst.is_load_store_instruction() is False: + return False + inc = getattr(t.inst, 'increment', None) + addr = getattr(t.inst, 'addr', None) + if inc is None or addr is None: + return False + + # If the address is already marked as input-only, + # don't do anything. + # + # TODO: This is only to gracefully deal with the case + # of architecture models where address offset fixup is + # still the default and ldr/str instructions with increment + # unconditionally model their address registers as + # input-only. + if addr not in t.inst.args_in_out: + return False + + idx = t.inst.args_in_out.index(addr) + + t.inst.args_in.append(addr) + t.inst.arg_types_in.append(t.inst.arg_types_in_out[idx]) + t.inst.args_in_restrictions.append(t.inst.args_in_out_restrictions[idx]) + # TODO: Architecture-model-specific code does not belong here. + if hasattr(t.inst, 'pattern_inputs'): + t.inst.pattern_inputs.append(t.inst.pattern_in_outs[idx]) + t.inst.num_in += 1 + + del t.inst.args_in_out[idx] + del t.inst.arg_types_in_out[idx] + del t.inst.args_in_out_restrictions[idx] + if hasattr(t.inst, 'pattern_inputs'): + del t.inst.pattern_in_outs[idx] + t.inst.num_in_out -= 1 + + if log is not None: + log.info(f"Relaxed input-output argument {addr} of {t} to input-only") + + # Signal that something changed + return True + return self.apply_cbs(address_offset_cb, logger) + def __init__(self, src, logger, config, parsing_cb=True): """Compute a data flow graph from a source code snippet. @@ -589,6 +637,9 @@ def __init__(self, src, logger, config, parsing_cb=True): if parsing_cb is True: self.apply_parsing_cbs() + if config._unsafe_address_offset_fixup is True: + self._address_offset_fixup_cbs() + self._selfcheck_outputs() def _selfcheck_outputs(self): diff --git a/slothy/targets/aarch64/aarch64_neon.py b/slothy/targets/aarch64/aarch64_neon.py index 90aa8dbc..810550e3 100644 --- a/slothy/targets/aarch64/aarch64_neon.py +++ b/slothy/targets/aarch64/aarch64_neon.py @@ -46,6 +46,7 @@ class which generates instruction parsers and writers from instruction templates from sympy import simplify +arch_name = "Arm_AArch64" llvm_mca_arch = "aarch64" class RegisterType(Enum): @@ -1123,50 +1124,50 @@ def write(self): class q_ldr_with_inc_writeback(Ldr_Q): # pylint: disable=missing-docstring,invalid-name pattern = "ldr , [, ]!" - inputs = ["Xc"] + in_outs = ["Xc"] outputs = ["Qa"] @classmethod def make(cls, src): obj = AArch64Instruction.build(cls, src) obj.increment = obj.immediate obj.pre_index = None - obj.addr = obj.args_in[0] + obj.addr = obj.args_in_out[0] return obj class q_ldr_with_postinc(Ldr_Q): # pylint: disable=missing-docstring,invalid-name pattern = "ldr , [], " - inputs = ["Xc"] + in_outs = ["Xc"] outputs = ["Qa"] @classmethod def make(cls, src): obj = AArch64Instruction.build(cls, src) obj.increment = obj.immediate obj.pre_index = None - obj.addr = obj.args_in[0] + obj.addr = obj.args_in_out[0] return obj class q_ld1_with_postinc(Ldr_Q): # pylint: disable=missing-docstring,invalid-name pattern = "ld1 {.
}, [], " - inputs = ["Xc"] + in_outs = ["Xc"] outputs = ["Va"] @classmethod def make(cls, src): obj = AArch64Instruction.build(cls, src) obj.increment = obj.immediate obj.pre_index = None - obj.addr = obj.args_in[0] + obj.addr = obj.args_in_out[0] return obj class q_ldp_with_postinc(Ldp_Q): # pylint: disable=missing-docstring,invalid-name pattern = "ldp , , [], " - inputs = ["Xc"] + in_outs = ["Xc"] outputs = ["Qa", "Qb"] @classmethod def make(cls, src): obj = AArch64Instruction.build(cls, src) obj.increment = obj.immediate obj.pre_index = None - obj.addr = obj.args_in[0] + obj.addr = obj.args_in_out[0] return obj class Str_Q(AArch64Instruction): # pylint: disable=missing-docstring,invalid-name @@ -1294,35 +1295,38 @@ def write(self): class q_str_with_inc_writeback(Str_Q): # pylint: disable=missing-docstring,invalid-name pattern = "str , [, ]!" - inputs = ["Qa", "Xc"] + in_outs = ["Xc"] + inputs = ["Qa"] @classmethod def make(cls, src): obj = AArch64Instruction.build(cls, src) obj.increment = obj.immediate obj.pre_index = None - obj.addr = obj.args_in[1] + obj.addr = obj.args_in_out[0] return obj class q_str_with_postinc(Str_Q): # pylint: disable=missing-docstring,invalid-name pattern = "str , [], " - inputs = ["Qa", "Xc"] + in_outs = ["Xc"] + inputs = ["Qa"] @classmethod def make(cls, src): obj = AArch64Instruction.build(cls, src) obj.increment = obj.immediate obj.pre_index = None - obj.addr = obj.args_in[1] + obj.addr = obj.args_in_out[0] return obj class q_stp_with_postinc(Stp_Q): # pylint: disable=missing-docstring,invalid-name pattern = "stp , , [], " - inputs = ["Qa", "Qb", "Xc"] + inputs = ["Qa", "Qb"] + in_outs = ["Xc"] @classmethod def make(cls, src): obj = AArch64Instruction.build(cls, src) obj.increment = obj.immediate obj.pre_index = None - obj.addr = obj.args_in[2] + obj.addr = obj.args_in_out[0] return obj class Ldr_X(AArch64Instruction): # pylint: disable=missing-docstring,invalid-name @@ -1365,14 +1369,14 @@ def write(self): class x_ldr_with_postinc(Ldr_X): # pylint: disable=missing-docstring,invalid-name pattern = "ldr , [], " - inputs = ["Xc"] + in_outs = ["Xc"] outputs = ["Xa"] @classmethod def make(cls, src): obj = AArch64Instruction.build(cls, src) obj.increment = obj.immediate obj.pre_index = None - obj.addr = obj.args_in[0] + obj.addr = obj.args_in_out[0] return obj class x_ldr_stack(Ldr_X): # pylint: disable=missing-docstring,invalid-name @@ -1524,26 +1528,26 @@ def write(self): class x_ldp_with_inc_writeback(Ldp_X): # pylint: disable=missing-docstring,invalid-name pattern = "ldp , , [, ]!" - inputs = ["Xc"] + in_outs = ["Xc"] outputs = ["Xa", "Xb"] @classmethod def make(cls, src): obj = AArch64Instruction.build(cls, src) obj.increment = obj.immediate obj.pre_index = None - obj.addr = obj.args_in[0] + obj.addr = obj.args_in_out[0] return obj class x_ldp_with_postinc_writeback(Ldp_X): # pylint: disable=missing-docstring,invalid-name pattern = "ldp , , [], " - inputs = ["Xc"] + in_outs = ["Xc"] outputs = ["Xa", "Xb"] @classmethod def make(cls, src): obj = AArch64Instruction.build(cls, src) obj.increment = obj.immediate obj.pre_index = None - obj.addr = obj.args_in[0] + obj.addr = obj.args_in_out[0] return obj class x_ldp_with_inc_hint(Ldp_X): # pylint: disable=missing-docstring,invalid-name @@ -2695,13 +2699,14 @@ def write(self): class x_str_postinc(Str_X): # pylint: disable=missing-docstring,invalid-name pattern = "str , [], " - inputs = ["Xa", "Xc"] + inputs = ["Xa"] + in_outs = ["Xc"] @classmethod def make(cls, src): obj = AArch64Instruction.build(cls, src) obj.increment = obj.immediate obj.pre_index = None - obj.addr = obj.args_in[1] + obj.addr = obj.args_in_out[0] return obj class x_str_sp_imm(Str_X): # pylint: disable=missing-docstring,invalid-name @@ -2849,13 +2854,14 @@ def write(self): class x_stp_with_inc_writeback(Stp_X): # pylint: disable=missing-docstring,invalid-name pattern = "stp , , [, ]!" - inputs = ["Xc", "Xa", "Xb"] + inputs = ["Xa", "Xb"] + in_outs = ["Xc"] @classmethod def make(cls, src): obj = AArch64Instruction.build(cls, src) obj.increment = obj.immediate obj.pre_index = None - obj.addr = obj.args_in[0] + obj.addr = obj.args_in_out[0] return obj class x_stp_with_inc_hint(Stp_X): # pylint: disable=missing-docstring,invalid-name @@ -2940,15 +2946,16 @@ def make(cls, src): class st4_with_inc(St4): # pylint: disable=missing-docstring,invalid-name pattern = "st4 {., ., ., .}, [], " - inputs = ["Xc", "Va", "Vb", "Vc", "Vd"] + inputs = ["Va", "Vb", "Vc", "Vd"] + in_outs = ["Xc"] @classmethod def make(cls, src): obj = AArch64Instruction.build(cls, src) - obj.addr = obj.args_in[0] + obj.addr = obj.args_in_out[0] obj.increment = obj.immediate obj.pre_index = None obj.args_in_combinations = [ - ( [1,2,3,4], [ [ f"v{i}", f"v{i+1}", f"v{i+2}", f"v{i+3}" ] for i in range(0,28) ] ) + ( [0,1,2,3], [ [ f"v{i}", f"v{i+1}", f"v{i+2}", f"v{i+3}" ] for i in range(0,28) ] ) ] return obj @@ -2970,15 +2977,16 @@ def make(cls, src): class st2_with_inc(St2): # pylint: disable=missing-docstring,invalid-name pattern = "st2 {., .}, [], " - inputs = ["Xc", "Va", "Vb"] + inputs = ["Va", "Vb"] + in_outs = ["Xc"] @classmethod def make(cls, src): obj = AArch64Instruction.build(cls, src) - obj.addr = obj.args_in[0] + obj.addr = obj.args_in_out[0] obj.increment = obj.immediate obj.pre_index = None obj.args_in_combinations = [ - ( [1,2], [ [ f"v{i}", f"v{i+1}" ] for i in range(0,30) ] ) + ( [0,1], [ [ f"v{i}", f"v{i+1}" ] for i in range(0,30) ] ) ] return obj @@ -3001,12 +3009,12 @@ def make(cls, src): class ld4_with_inc(Ld4): # pylint: disable=missing-docstring,invalid-name pattern = "ld4 {., ., ., .}, [], " - inputs = ["Xc"] + in_outs = ["Xc"] outputs = ["Va", "Vb", "Vc", "Vd"] @classmethod def make(cls, src): obj = AArch64Instruction.build(cls, src) - obj.addr = obj.args_in[0] + obj.addr = obj.args_in_out[0] obj.increment = obj.immediate obj.pre_index = None obj.args_out_combinations = [ @@ -3033,12 +3041,12 @@ def make(cls, src): class ld2_with_inc(Ld2): # pylint: disable=missing-docstring,invalid-name pattern = "ld2 {., .}, [], " - inputs = ["Xc"] + in_outs = ["Xc"] outputs = ["Va", "Vb"] @classmethod def make(cls, src): obj = AArch64Instruction.build(cls, src) - obj.addr = obj.args_in[0] + obj.addr = obj.args_in_out[0] obj.increment = obj.immediate obj.pre_index = None obj.args_out_combinations = [ diff --git a/slothy/targets/arm_v81m/arch_v81m.py b/slothy/targets/arm_v81m/arch_v81m.py index 778a93ee..204f3f62 100644 --- a/slothy/targets/arm_v81m/arch_v81m.py +++ b/slothy/targets/arm_v81m/arch_v81m.py @@ -40,6 +40,7 @@ from sympy import simplify from enum import Enum +arch_name = "Arm_v81M" llvm_mca_arch = "arm" class RegisterType(Enum): From bbc7e0dae55926548046d10d7590b6eb32c1b094 Mon Sep 17 00:00:00 2001 From: Hanno Becker Date: Sat, 23 Nov 2024 04:17:56 +0000 Subject: [PATCH 02/16] AArch64 model: Fix misleading names of ld/str instructions with hint There are various instructions with the suffix `_with_inc_hint`, suggestive of an address increment. However, none of these instructions changes the address register. This commit changes those instructions to use `_with_imm_hint` instead. No changes to the uArch models are needed because the respective instructions are configured through their parent classes Ldr_X etc. --- slothy/targets/aarch64/aarch64_neon.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/slothy/targets/aarch64/aarch64_neon.py b/slothy/targets/aarch64/aarch64_neon.py index 810550e3..b2e8f36f 100644 --- a/slothy/targets/aarch64/aarch64_neon.py +++ b/slothy/targets/aarch64/aarch64_neon.py @@ -958,7 +958,7 @@ def make(cls, src): obj.addr = obj.args_in[0] return obj -class q_ldr_with_inc_hint(Ldr_Q): # pylint: disable=missing-docstring,invalid-name +class q_ldr_with_imm_hint(Ldr_Q): # pylint: disable=missing-docstring,invalid-name pattern = "ldrh , , , " inputs = ["Xc", "Th"] outputs = ["Qa"] @@ -1187,7 +1187,7 @@ def make(cls, src): obj.addr = obj.args_in[1] return obj -class q_str_with_inc_hint(Str_Q): # pylint: disable=missing-docstring,invalid-name +class q_str_with_imm_hint(Str_Q): # pylint: disable=missing-docstring,invalid-name pattern = "strh , , , " inputs = ["Qa", "Xc"] outputs = ["Th"] @@ -1550,7 +1550,7 @@ def make(cls, src): obj.addr = obj.args_in_out[0] return obj -class x_ldp_with_inc_hint(Ldp_X): # pylint: disable=missing-docstring,invalid-name +class x_ldp_with_imm_hint(Ldp_X): # pylint: disable=missing-docstring,invalid-name pattern = "ldph , , , , " inputs = ["Xc", "Th"] outputs = ["Xa", "Xb"] @@ -1566,7 +1566,7 @@ def write(self): self.immediate = simplify(self.pre_index) return super().write() -class x_ldp_sp_with_inc_hint(Ldp_X): # pylint: disable=missing-docstring,invalid-name +class x_ldp_sp_with_imm_hint(Ldp_X): # pylint: disable=missing-docstring,invalid-name pattern = "ldph , , sp, , " inputs = ["Th"] outputs = ["Xa", "Xb"] @@ -1582,7 +1582,7 @@ def write(self): self.immediate = simplify(self.pre_index) return super().write() -class x_ldp_sp_with_inc_hint2(Ldp_X): # pylint: disable=missing-docstring,invalid-name +class x_ldp_sp_with_imm_hint2(Ldp_X): # pylint: disable=missing-docstring,invalid-name pattern = "ldphp , , sp, , , " inputs = ["Th0", "Th1"] outputs = ["Xa", "Xb"] @@ -1598,7 +1598,7 @@ def write(self): self.immediate = simplify(self.pre_index) return super().write() -class x_ldp_with_inc_hint2(Ldp_X): # pylint: disable=missing-docstring,invalid-name +class x_ldp_with_imm_hint2(Ldp_X): # pylint: disable=missing-docstring,invalid-name pattern = "ldphp , , , , , " inputs = ["Xc", "Th0", "Th1"] outputs = ["Xa", "Xb"] @@ -2864,7 +2864,7 @@ def make(cls, src): obj.addr = obj.args_in_out[0] return obj -class x_stp_with_inc_hint(Stp_X): # pylint: disable=missing-docstring,invalid-name +class x_stp_with_imm_hint(Stp_X): # pylint: disable=missing-docstring,invalid-name pattern = "stph , , , , " inputs = ["Xc", "Xa", "Xb"] outputs = ["Th"] @@ -2880,7 +2880,7 @@ def write(self): self.immediate = simplify(self.pre_index) return super().write() -class x_stp_sp_with_inc_hint(Stp_X): # pylint: disable=missing-docstring,invalid-name +class x_stp_sp_with_imm_hint(Stp_X): # pylint: disable=missing-docstring,invalid-name pattern = "stph , , sp, , " inputs = ["Xa", "Xb"] outputs = ["Th"] @@ -2896,7 +2896,7 @@ def write(self): self.immediate = simplify(self.pre_index) return super().write() -class x_stp_sp_with_inc_hint2(Stp_X): # pylint: disable=missing-docstring,invalid-name +class x_stp_sp_with_imm_hint2(Stp_X): # pylint: disable=missing-docstring,invalid-name pattern = "stphp , , sp, , , " inputs = ["Xa", "Xb"] outputs = ["Th0", "Th1"] @@ -2912,7 +2912,7 @@ def write(self): self.immediate = simplify(self.pre_index) return super().write() -class x_stp_with_inc_hint2(Stp_X): # pylint: disable=missing-docstring,invalid-name +class x_stp_with_imm_hint2(Stp_X): # pylint: disable=missing-docstring,invalid-name pattern = "stphp , , , , , " inputs = ["Xa", "Xb", "Xc"] outputs = ["Th0", "Th1"] From b1a4ab5fb0d8cdfa7cfd6d9d07c8f8e6004afd11 Mon Sep 17 00:00:00 2001 From: Amin Abdulrahman Date: Fri, 11 Oct 2024 16:01:21 +0200 Subject: [PATCH 03/16] More general structure for loop parsing --- slothy/core/slothy.py | 10 +-- slothy/targets/aarch64/aarch64_neon.py | 118 ++++++++++++++----------- slothy/targets/arm_v81m/arch_v81m.py | 108 +++++++++++++++------- 3 files changed, 146 insertions(+), 90 deletions(-) diff --git a/slothy/core/slothy.py b/slothy/core/slothy.py index 3fb67a41..bcff4108 100644 --- a/slothy/core/slothy.py +++ b/slothy/core/slothy.py @@ -367,12 +367,11 @@ def fusion_loop(self, loop_lbl): """Run fusion callbacks on loop body""" logger = self.logger.getChild(f"ssa_loop_{loop_lbl}") - pre , body, post, _, other_data = \ + pre , body, post, _, other_data, loop = \ self.arch.Loop.extract(self.source, loop_lbl) - (loop_cnt, _, _) = other_data + loop_cnt = other_data['cnt'] indentation = AsmHelper.find_indentation(body) - loop = self.arch.Loop(lbl_start=loop_lbl) body_ssa = SourceLine.read_multiline(loop.start(loop_cnt)) + \ SourceLine.apply_indentation(self._fusion_core(pre, body, logger), indentation) + \ SourceLine.read_multiline(loop.end(other_data)) @@ -398,9 +397,9 @@ def optimize_loop(self, loop_lbl, postamble_label=None): logger = self.logger.getChild(loop_lbl) - early, body, late, _, other_data = \ + early, body, late, _, other_data, loop = \ self.arch.Loop.extract(self.source, loop_lbl) - (loop_cnt, _, _) = other_data + loop_cnt = other_data['cnt'] # Check if the body has a dominant indentation indentation = AsmHelper.find_indentation(body) @@ -464,7 +463,6 @@ def loop_lbl_iter(i): for i in range(1, num_exceptional): optimized_code += indented(self.arch.Branch.if_equal(loop_cnt, i, loop_lbl_iter(i))) - loop = self.arch.Loop(lbl_start=loop_lbl) optimized_code += indented(preamble_code) if self.config.sw_pipelining.unknown_iteration_count: diff --git a/slothy/targets/aarch64/aarch64_neon.py b/slothy/targets/aarch64/aarch64_neon.py index b2e8f36f..fad73cc0 100644 --- a/slothy/targets/aarch64/aarch64_neon.py +++ b/slothy/targets/aarch64/aarch64_neon.py @@ -43,6 +43,7 @@ class which generates instruction parsers and writers from instruction templates import math from enum import Enum from functools import cache +from abc import ABC, abstractmethod from sympy import simplify @@ -169,70 +170,46 @@ def unconditional(lbl): """Emit unconditional branch""" yield f"b {lbl}" -class Loop: - """Helper functions for parsing and writing simple loops in AArch64 - - TODO: Generalize; current implementation too specific about shape of loop""" - +class Loop(ABC): def __init__(self, lbl_start="1", lbl_end="2", loop_init="lr"): self.lbl_start = lbl_start self.lbl_end = lbl_end self.loop_init = loop_init + @abstractmethod def start(self, loop_cnt, indentation=0, fixup=0, unroll=1, jump_if_empty=None): """Emit starting instruction(s) and jump label for loop""" - indent = ' ' * indentation - if unroll > 1: - assert unroll in [1,2,4,8,16,32] - yield f"{indent}lsr {loop_cnt}, {loop_cnt}, #{int(math.log2(unroll))}" - if fixup != 0: - yield f"{indent}sub {loop_cnt}, {loop_cnt}, #{fixup}" - if jump_if_empty is not None: - yield f"cbz {loop_cnt}, {jump_if_empty}" - yield f"{self.lbl_start}:" + # TODO: Use different type of fixup for cmp vs. subs loops + pass + @abstractmethod def end(self, other, indentation=0): """Emit compare-and-branch at the end of the loop""" - (reg0, reg1, imm) = other - indent = ' ' * indentation - lbl_start = self.lbl_start - if lbl_start.isdigit(): - lbl_start += "b" - - yield f"{indent}sub {reg0}, {reg1}, {imm}" - yield f"{indent}cbnz {reg0}, {lbl_start}" - - @staticmethod - def extract(source, lbl): - """Locate a loop with start label `lbl` in `source`. - - We currently only support the following loop forms: - - ``` - loop_lbl: - {code} - sub[s] , , #1 - (cbnz|bnz|bne) , loop_lbl - ``` + pass + + def _extract(self, source, lbl): + """Locate a loop with start label `lbl` in `source`.``` """ assert isinstance(source, list) + + additional_data = None pre = [] body = [] post = [] - loop_lbl_regexp_txt = r"^\s*(?P
by list of all possible datatypes + mnemonic = Instruction.unfold_abbrevs(obj.mnemonic) + + expected_args = obj.num_in + obj.num_out + obj.num_in_out + regexp_txt = rf"^\s*{mnemonic}" + if expected_args > 0: + regexp_txt += r"\s+" + regexp_txt += ','.join([r"\s*(\w+)\s*" for _ in range(expected_args)]) + regexp = re.compile(regexp_txt) + + p = regexp.match(src) + if p is None: + raise Instruction.ParsingException( + f"Doesn't match basic instruction template {regexp_txt}") + + operands = list(p.groups()) + + if obj.num_out > 0: + obj.args_out = operands[:obj.num_out] + idx_args_in = obj.num_out + elif obj.num_in_out > 0: + obj.args_in_out = operands[:obj.num_in_out] + idx_args_in = obj.num_in_out + else: + idx_args_in = 0 + + obj.args_in = operands[idx_args_in:] + + if not len(obj.args_in) == obj.num_in: + raise FatalParsingException(f"Something wrong parsing {src}: Expect {obj.num_in} input," + f" but got {len(obj.args_in)} ({obj.args_in})") + + return obj + + @staticmethod + def parser(src_line): + """Global factory method parsing an assembly line into an instance + of a subclass of Instruction.""" + insts = [] + exceptions = {} + instnames = [] + + src = src_line.text.strip() + + # Iterate through all derived classes and call their parser + # until one of them hopefully succeeds + for inst_class in Instruction.all_subclass_leaves: + try: + inst = inst_class.make(src) + instnames = [inst_class.__name__] + insts = [inst] + break + except Instruction.ParsingException as e: + exceptions[inst_class.__name__] = e + + for i in insts: + i.source_line = src_line + i.extract_read_writes() + + if len(insts) == 0: + logging.error("Failed to parse instruction %s", src) + logging.error("A list of attempted parsers and their exceptions follows.") + for i,e in exceptions.items(): + msg = f"* {i + ':':20s} {e}" + logging.error(msg) + raise Instruction.ParsingException( + f"Couldn't parse {src}\nYou may need to add support "\ + "for a new instruction (variant)?") + + logging.debug("Parsing result for '%s': %s", src, instnames) + return insts + + def __repr__(self): + return self.write() + +class Armv7mInstruction(Instruction): + """Abstract class representing Armv7m instructions""" + + PARSERS = {} + + @staticmethod + def _unfold_pattern(src): + + src = re.sub(r"\.", "\\\\s*\\\\.\\\\s*", src) + src = re.sub(r"\[", "\\\\s*\\\\[\\\\s*", src) + src = re.sub(r"\]", "\\\\s*\\\\]\\\\s*", src) + + def pattern_transform(g): + return \ + f"([{g.group(1).lower()}{g.group(1)}]" +\ + f"(?P[0-9_][0-9_]*)|" +\ + f"([{g.group(1).lower()}{g.group(1)}]<(?P\\w+)>))" + src = re.sub(r"<([RS])(\w+)>", pattern_transform, src) + + # Replace or , , ... with pattern + def replace_placeholders(src, mnemonic_key, regexp, group_name): + prefix = f"<{mnemonic_key}" + pattern = f"<{mnemonic_key}>" + def pattern_i(i): + return f"<{mnemonic_key}{i}>" + + cnt = src.count(prefix) + if cnt > 1: + for i in range(cnt): + src = re.sub(pattern_i(i), f"(?P<{group_name}{i}>{regexp})", src) + else: + src = re.sub(pattern, f"(?P<{group_name}>{regexp})", src) + + return src + + flaglist = ["eq","ne","cs","hs","cc","lo","mi","pl","vs","vc","hi","ls","ge","lt","gt","le"] + + flag_pattern = '|'.join(flaglist) + dt_pattern = "(?:|2|4|8|16)(?:B|H|S|D|b|h|s|d)" # TODO: Notion of dt can be placed with notion for size in FP instructions + imm_pattern = "#(\\\\w|\\\\s|/| |-|\\*|\\+|\\(|\\)|=|,)+" + index_pattern = "[0-9]+" + width_pattern = "(?:\.w|\.n|)" + barrel_pattern = "(?:lsl|ror|lsr|asr)" + range_pattern = "\{(?P[rs])(?P\\\\d+)-[rs](?P\\\\d+)\}" + + src = re.sub(" ", "\\\\s+", src) + src = re.sub(",", "\\\\s*,\\\\s*", src) + + src = replace_placeholders(src, "imm", imm_pattern, "imm") + src = replace_placeholders(src, "dt", dt_pattern, "datatype") + src = replace_placeholders(src, "index", index_pattern, "index") + src = replace_placeholders(src, "flag", flag_pattern, "flag") # TODO: Are any changes required for IT syntax? + src = replace_placeholders(src, "width", width_pattern, "width") + src = replace_placeholders(src, "barrel", barrel_pattern, "barrel") + src = replace_placeholders(src, "range", range_pattern, "range") + + src = r"\s*" + src + r"\s*(//.*)?\Z" + return src + + @staticmethod + def _build_parser(src): + regexp_txt = Armv7mInstruction._unfold_pattern(src) + regexp = re.compile(regexp_txt) + + def _parse(line): + regexp_result = regexp.match(line) + if regexp_result is None: + raise Instruction.ParsingException(f"Does not match instruction pattern {src}"\ + f"[regex: {regexp_txt}]") + res = regexp.match(line).groupdict() + items = list(res.items()) + for k, v in items: + for l in ["symbol_", "raw_"]: + if k.startswith(l): + del res[k] + if v is None: + continue + k = k[len(l):] + res[k] = v + return res + return _parse + + @staticmethod + def get_parser(pattern): + """Build parser for given AArch64 instruction pattern""" + if pattern in Armv7mInstruction.PARSERS: + return Armv7mInstruction.PARSERS[pattern] + parser = Armv7mInstruction._build_parser(pattern) + Armv7mInstruction.PARSERS[pattern] = parser + return parser + + @cache + @staticmethod + def _infer_register_type(ptrn): + if ptrn[0].upper() in ["R"]: + return RegisterType.GPR + if ptrn[0].upper() in ["S"]: + return RegisterType.FPR + if ptrn[0].upper() in ["T"]: + return RegisterType.HINT + raise FatalParsingException(f"Unknown pattern: {ptrn}") + + def __init__(self, pattern, *, inputs=None, outputs=None, in_outs=None, modifiesFlags=False, + dependsOnFlags=False): + + self.mnemonic = pattern.split(" ")[0] + + if inputs is None: + inputs = [] + if outputs is None: + outputs = [] + if in_outs is None: + in_outs = [] + arg_types_in = [Armv7mInstruction._infer_register_type(r) for r in inputs] + arg_types_out = [Armv7mInstruction._infer_register_type(r) for r in outputs] + arg_types_in_out = [Armv7mInstruction._infer_register_type(r) for r in in_outs] + + if modifiesFlags: + arg_types_out += [RegisterType.FLAGS] + outputs += ["flags"] + + if dependsOnFlags: + arg_types_in += [RegisterType.FLAGS] + inputs += ["flags"] + + super().__init__(mnemonic=pattern, + arg_types_in=arg_types_in, + arg_types_out=arg_types_out, + arg_types_in_out=arg_types_in_out) + + self.inputs = inputs + self.outputs = outputs + self.in_outs = in_outs + + self.pattern = pattern + self.pattern_inputs = list(zip(inputs, arg_types_in, strict=True)) + self.pattern_outputs = list(zip(outputs, arg_types_out, strict=True)) + self.pattern_in_outs = list(zip(in_outs, arg_types_in_out, strict=True)) + + + + @staticmethod + def _to_reg(ty, s): + if ty == RegisterType.GPR: + c = "r" + elif ty == RegisterType.FPR: + c = "s" + elif ty == RegisterType.HINT: + c = "t" + else: + assert False + if s.replace('_','').isdigit(): + return f"{c}{s}" + return s + + @staticmethod + def _build_pattern_replacement(s, ty, arg): + if ty == RegisterType.GPR: + if arg[0] != "r": + return f"{s[0].upper()}<{arg}>" + return s[0].lower() + arg[1:] + if ty == RegisterType.FPR: + if arg[0] != "s": + return f"{s[0].upper()}<{arg}>" + return s[0].lower() + arg[1:] + if ty == RegisterType.HINT: + if arg[0] != "t": + return f"{s[0].upper()}<{arg}>" + return s[0].lower() + arg[1:] + raise FatalParsingException(f"Unknown register type ({s}, {ty}, {arg})") + + @staticmethod + def _instantiate_pattern(s, ty, arg, out): + if ty == RegisterType.FLAGS or ty == RegisterType.HINT: + return out + rep = Armv7mInstruction._build_pattern_replacement(s, ty, arg) + res = out.replace(f"<{s}>", rep) + if res == out: + raise FatalParsingException(f"Failed to replace <{s}> by {rep} in {out}!") + return res + + @staticmethod + def build_core(obj, res): + + def group_to_attribute(group_name, attr_name, f=None): + def f_default(x): + return x + def group_name_i(i): + return f"{group_name}{i}" + if f is None: + f = f_default + if group_name in res.keys(): + setattr(obj, attr_name, f(res[group_name])) + else: + idxs = [ i for i in range(4) if group_name_i(i) in res.keys() ] + if len(idxs) == 0: + return + assert idxs == list(range(len(idxs))) + setattr(obj, attr_name, + list(map(lambda i: f(res[group_name_i(i)]), idxs))) + + group_to_attribute('datatype', 'datatype', lambda x: x.lower()) + group_to_attribute('imm', 'immediate', lambda x:x[1:]) # Strip '#' + group_to_attribute('index', 'index', int) + group_to_attribute('flag', 'flag') + group_to_attribute('width', 'width') + group_to_attribute('barrel', 'barrel') + group_to_attribute('range', 'range') + group_to_attribute('range_start', 'range_start', int) + group_to_attribute('range_end', 'range_end', int) + group_to_attribute('range_type', 'range_type') + + for s, ty in obj.pattern_inputs: + if ty == RegisterType.FLAGS: + obj.args_in.append("flags") + else: + obj.args_in.append(Armv7mInstruction._to_reg(ty, res[s])) + for s, ty in obj.pattern_outputs: + if ty == RegisterType.FLAGS: + obj.args_out.append("flags") + else: + obj.args_out.append(Armv7mInstruction._to_reg(ty, res[s])) + + for s, ty in obj.pattern_in_outs: + obj.args_in_out.append(Armv7mInstruction._to_reg(ty, res[s])) + + @staticmethod + def build(c, src): + pattern = getattr(c, "pattern") + inputs = getattr(c, "inputs", []).copy() + outputs = getattr(c, "outputs", []).copy() + in_outs = getattr(c, "in_outs", []).copy() + modifies_flags = getattr(c,"modifiesFlags", False) + depends_on_flags = getattr(c,"dependsOnFlags", False) + + if isinstance(src, str): + # Leave checking the mnemonic out for now; not strictly required + # Allows xxx.w and xxx.n syntax + res = Armv7mInstruction.get_parser(pattern)(src) + else: + assert isinstance(src, dict) + res = src + + obj = c(pattern, inputs=inputs, outputs=outputs, in_outs=in_outs, + modifiesFlags=modifies_flags, dependsOnFlags=depends_on_flags) + + Armv7mInstruction.build_core(obj, res) + return obj + + @classmethod + def make(cls, src): + return Armv7mInstruction.build(cls, src) + + def write(self): + out = self.pattern + l = list(zip(self.args_in, self.pattern_inputs)) + \ + list(zip(self.args_out, self.pattern_outputs)) + \ + list(zip(self.args_in_out, self.pattern_in_outs)) + for arg, (s, ty) in l: + out = Armv7mInstruction._instantiate_pattern(s, ty, arg, out) + + def replace_pattern(txt, attr_name, mnemonic_key, t=None): + def t_default(x): + return x + if t is None: + t = t_default + + a = getattr(self, attr_name) + if a is None: + return txt + if not isinstance(a, list): + txt = txt.replace(f"<{mnemonic_key}>", t(a)) + return txt + for i, v in enumerate(a): + txt = txt.replace(f"<{mnemonic_key}{i}>", t(v)) + return txt + + out = replace_pattern(out, "immediate", "imm", lambda x: f"#{x}") + out = replace_pattern(out, "datatype", "dt", lambda x: x.upper()) + out = replace_pattern(out, "flag", "flag") + out = replace_pattern(out, "index", "index", str) + out = replace_pattern(out, "width", "width", lambda x: x.lower()) + out = replace_pattern(out, "barrel", "barrel", lambda x: x.lower()) + out = replace_pattern(out, "range", "range", lambda x: x.lower()) + + out = out.replace("\\[", "[") + out = out.replace("\\]", "]") + return out + +class Armv7mBasicArithmetic(Armv7mInstruction): # pylint: disable=missing-docstring,invalid-name + pass +class Armv7mShiftedArithmetic(Armv7mInstruction): # pylint: disable=missing-docstring,invalid-name + pass +class Armv7mMultiplication(Armv7mInstruction): # pylint: disable=missing-docstring,invalid-name + pass +class Armv7mLogical(Armv7mInstruction): # pylint: disable=missing-docstring,invalid-name + pass +class Armv7mShiftedLogical(Armv7mInstruction): # pylint: disable=missing-docstring,invalid-name + pass +class Armv7mLoadInstruction(Armv7mInstruction): # pylint: disable=missing-docstring,invalid-name + pass +class Armv7mStoreInstruction(Armv7mInstruction): # pylint: disable=missing-docstring,invalid-name + pass +class Armv7mFPInstruction(Armv7mInstruction): # pylint: disable=missing-docstring,invalid-name + pass + +# FP +class vmov_gpr(Armv7mFPInstruction): # pylint: disable=missing-docstring,invalid-name + pattern = "vmov , " + inputs = ["Sa"] + outputs = ["Rd"] + +class vmov_gpr2(Armv7mFPInstruction): # pylint: disable=missing-docstring,invalid-name + pattern = "vmov , " + inputs = ["Ra"] + outputs = ["Sd"] + +class vmov_gpr2_dual(Armv7mFPInstruction): # pylint: disable=missing-docstring,invalid-name + pattern = "vmov , , , " + inputs = ["Ra", "Rb"] + outputs = ["Sd1", "Sd2"] + + @classmethod + def make(cls, src): + obj = Armv7mInstruction.build(cls, src) + obj.args_out_combinations = [ + ( [0,1], [ [ f"s{i}", f"s{i+1}" ] for i in range(0,len(RegisterType.list_registers(RegisterType.FPR))) ] ) + ] + return obj + +# movs +class movw_imm(Armv7mBasicArithmetic): # pylint: disable=missing-docstring,invalid-name + pattern = "movw , " + outputs = ["Rd"] + +class movt_imm(Armv7mBasicArithmetic): # pylint: disable=missing-docstring,invalid-name + pattern = "movt , " + in_outs = ["Rd"] + +# Addition +class add(Armv7mBasicArithmetic): # pylint: disable=missing-docstring,invalid-name + pattern = "add , , " + inputs = ["Ra", "Rb"] + outputs = ["Rd"] + +class add_short(Armv7mBasicArithmetic): # pylint: disable=missing-docstring,invalid-name + pattern = "add , " + inputs = ["Ra"] + in_outs = ["Rd"] + +class add_imm(Armv7mBasicArithmetic): # pylint: disable=missing-docstring,invalid-name + pattern = "add , , " + inputs = ["Ra"] + outputs = ["Rd"] + +class add_imm_short(Armv7mBasicArithmetic): # pylint: disable=missing-docstring,invalid-name + pattern = "add , " + in_outs = ["Rd"] + +class add_shifted(Armv7mShiftedArithmetic): # pylint: disable=missing-docstring,invalid-name + pattern = "add , , , " + inputs = ["Ra","Rb"] + outputs = ["Rd"] + +class adds(Armv7mBasicArithmetic): # pylint: disable=missing-docstring,invalid-name + pattern = "adds , , " + inputs = ["Ra", "Rb"] + outputs = ["Rd"] + modifiesFlags=True + +class uadd16(Armv7mBasicArithmetic): # pylint: disable=missing-docstring,invalid-name + pattern = "uadd16 , , " + inputs = ["Ra","Rb"] + outputs = ["Rd"] + +class sadd16(Armv7mBasicArithmetic): # pylint: disable=missing-docstring,invalid-name + pattern = "sadd16 , , " + inputs = ["Ra","Rb"] + outputs = ["Rd"] + +# Subtraction +class sub(Armv7mBasicArithmetic): # pylint: disable=missing-docstring,invalid-name + pattern = "sub , , " + inputs = ["Ra","Rb"] + outputs = ["Rd"] + +class sub_shifted(Armv7mShiftedArithmetic): # pylint: disable=missing-docstring,invalid-name + pattern = "sub , , , " + inputs = ["Ra","Rb"] + outputs = ["Rd"] + +class sub_short(Armv7mBasicArithmetic): # pylint: disable=missing-docstring,invalid-name + pattern = "sub , " + inputs = ["Ra"] + in_outs = ["Rd"] + +class sub_imm_short(Armv7mBasicArithmetic): # pylint: disable=missing-docstring,invalid-name + pattern = "sub , " + in_outs = ["Ra"] + +class subs_imm_short(Armv7mBasicArithmetic): # pylint: disable=missing-docstring,invalid-name + pattern = "subs , " + in_outs = ["Ra"] + modifiesFlags = True + +class usub16(Armv7mBasicArithmetic): # pylint: disable=missing-docstring,invalid-name + pattern = "usub16 , , " + inputs = ["Ra","Rb"] + outputs = ["Rd"] + +class ssub16(Armv7mBasicArithmetic): # pylint: disable=missing-docstring,invalid-name + pattern = "ssub16 , , " + inputs = ["Ra","Rb"] + outputs = ["Rd"] + +# Multiplication +class mul(Armv7mMultiplication): # pylint: disable=missing-docstring,invalid-name + pattern = "mul , , " + inputs = ["Ra","Rb"] + outputs = ["Rd"] + +class mul_short(Armv7mMultiplication): # pylint: disable=missing-docstring,invalid-name + pattern = "mul , " + inputs = ["Ra"] + in_outs = ["Rd"] + +class mla(Armv7mMultiplication): # pylint: disable=missing-docstring,invalid-name + pattern = "mla , , , " + inputs = ["Ra","Rb", "Rc"] + outputs = ["Rd"] + +class mls(Armv7mMultiplication): # pylint: disable=missing-docstring,invalid-name + pattern = "mls , , , " + inputs = ["Ra","Rb", "Rc"] + outputs = ["Rd"] + +class smulwb(Armv7mMultiplication): # pylint: disable=missing-docstring,invalid-name + pattern = "smulwb , , " + inputs = ["Ra","Rb"] + outputs = ["Rd"] + +class smulwt(Armv7mMultiplication): # pylint: disable=missing-docstring,invalid-name + pattern = "smulwt , , " + inputs = ["Ra","Rb"] + outputs = ["Rd"] + +class smultb(Armv7mMultiplication): # pylint: disable=missing-docstring,invalid-name + pattern = "smultb , , " + inputs = ["Ra","Rb"] + outputs = ["Rd"] + +class smultt(Armv7mMultiplication): # pylint: disable=missing-docstring,invalid-name + pattern = "smultt , , " + inputs = ["Ra","Rb"] + outputs = ["Rd"] + +class smulbb(Armv7mMultiplication): # pylint: disable=missing-docstring,invalid-name + pattern = "smulbb , , " + inputs = ["Ra","Rb"] + outputs = ["Rd"] + +class smlabt(Armv7mMultiplication): # pylint: disable=missing-docstring,invalid-name + pattern = "smlabt , , , " + inputs = ["Ra","Rb", "Rc"] + outputs = ["Rd"] + +class smlabb(Armv7mMultiplication): # pylint: disable=missing-docstring,invalid-name + pattern = "smlabb , , , " + inputs = ["Ra","Rb", "Rc"] + outputs = ["Rd"] + +class smlatt(Armv7mMultiplication): # pylint: disable=missing-docstring,invalid-name + pattern = "smlatt , , , " + inputs = ["Ra","Rb", "Rc"] + outputs = ["Rd"] + +class smull(Armv7mMultiplication): # pylint: disable=missing-docstring,invalid-name + pattern = "smull , , , " + inputs = ["Rc","Rd"] + outputs = ["Ra", "Rb"] + +class smlal(Armv7mMultiplication): # pylint: disable=missing-docstring,invalid-name + pattern = "smlal , , , " + inputs = ["Rc","Rd"] + in_outs = ["Ra", "Rb"] + +class smlad(Armv7mMultiplication): # pylint: disable=missing-docstring,invalid-name + pattern = "smlad , , , " + inputs = ["Rb", "Rc","Rd"] + outputs = ["Ra"] + +class smladx(Armv7mMultiplication): # pylint: disable=missing-docstring,invalid-name + pattern = "smladx , , , " + inputs = ["Rb", "Rc","Rd"] + outputs = ["Ra"] + +class smmulr(Armv7mMultiplication): # pylint: disable=missing-docstring,invalid-name + pattern = "smmulr , , " + inputs = ["Rb","Rc"] + outputs = ["Ra"] + +class smuad(Armv7mMultiplication): # pylint: disable=missing-docstring,invalid-name + pattern = "smuad , , " + inputs = ["Rb","Rc"] + outputs = ["Ra"] + +class smuadx(Armv7mMultiplication): # pylint: disable=missing-docstring,invalid-name + pattern = "smuadx , , " + inputs = ["Rb","Rc"] + outputs = ["Ra"] + + +# Logical + +class neg_short(Armv7mLogical): # pylint: disable=missing-docstring,invalid-name + pattern = "neg , " + inputs = ["Ra"] + in_outs = ["Rd"] +class log_and(Armv7mLogical): # pylint: disable=missing-docstring,invalid-name + pattern = "and , , " + inputs = ["Ra", "Rb"] + outputs = ["Rd"] + +class log_and_shifted(Armv7mShiftedLogical): # pylint: disable=missing-docstring,invalid-name + pattern = "and , , , " + inputs = ["Ra", "Rb"] + outputs = ["Rd"] + +class log_or(Armv7mLogical): # pylint: disable=missing-docstring,invalid-name + pattern = "orr , , " + inputs = ["Ra", "Rb"] + outputs = ["Rd"] + +class log_or_shifted(Armv7mShiftedLogical): # pylint: disable=missing-docstring,invalid-name + pattern = "orr , , , " + inputs = ["Ra", "Rb"] + outputs = ["Rd"] + +class eor(Armv7mLogical): # pylint: disable=missing-docstring,invalid-name + pattern = "eor , , " + inputs = ["Ra", "Rb"] + outputs = ["Rd"] + +class eor_short(Armv7mLogical): # pylint: disable=missing-docstring,invalid-name + pattern = "eor , " + inputs = ["Ra"] + in_outs = ["Rd"] + +class eors(Armv7mLogical): # pylint: disable=missing-docstring,invalid-name + pattern = "eors , , " + inputs = ["Ra", "Rb"] + outputs = ["Rd"] + modifiesFlags = True + +class eors_short(Armv7mLogical): # pylint: disable=missing-docstring,invalid-name + pattern = "eors , " + inputs = ["Ra"] + in_outs = ["Rd"] + modifiesFlags = True + +class eor_shifted(Armv7mShiftedLogical): # pylint: disable=missing-docstring,invalid-name + pattern = "eor , , , " + inputs = ["Ra", "Rb"] + outputs = ["Rd"] + + def write(self): + self.immediate = simplify(self.immediate) + return super().write() + +class bic(Armv7mLogical): # pylint: disable=missing-docstring,invalid-name + pattern = "bic , , " + inputs = ["Ra", "Rb"] + outputs = ["Rd"] + +class bics(Armv7mLogical): # pylint: disable=missing-docstring,invalid-name + pattern = "bics , , " + inputs = ["Ra", "Rb"] + outputs = ["Rd"] + modifiesFlags = True + +class bic_shifted(Armv7mShiftedLogical): # pylint: disable=missing-docstring,invalid-name + pattern = "bic , , , " + inputs = ["Ra", "Rb"] + outputs = ["Rd"] + +class ubfx_imm(Armv7mLogical): # pylint: disable=missing-docstring,invalid-name + pattern = "ubfx , , , " + inputs = ["Ra"] + outputs = ["Rd"] + +class ror(Armv7mLogical): # pylint: disable=missing-docstring,invalid-name + pattern = "ror , , " + inputs = ["Ra"] + outputs = ["Rd"] + +class ror_short(Armv7mLogical): # pylint: disable=missing-docstring,invalid-name + pattern = "ror , " + in_outs = ["Rd"] + +class rors_short(Armv7mLogical): # pylint: disable=missing-docstring,invalid-name + pattern = "rors , " + in_outs = ["Rd"] + modifiesFlags = True + +class lsl(Armv7mLogical): # pylint: disable=missing-docstring,invalid-name + pattern = "lsl , , " + inputs = ["Ra"] + outputs = ["Rd"] + +class asr(Armv7mLogical): # pylint: disable=missing-docstring,invalid-name + pattern = "asr , , " + inputs = ["Ra"] + outputs = ["Rd"] + +class asrs(Armv7mLogical): # pylint: disable=missing-docstring,invalid-name + pattern = "asrs , , " + inputs = ["Ra"] + outputs = ["Rd"] + modifiesFlags = True + +class pkhtb(Armv7mShiftedLogical): # pylint: disable=missing-docstring,invalid-name + pattern = "pkhtb , , , " + inputs = ["Ra", "Rb"] + outputs = ["Rd"] + +class pkhbt(Armv7mLogical): # pylint: disable=missing-docstring,invalid-name + pattern = "pkhbt , , " + inputs = ["Ra", "Rb"] + outputs = ["Rd"] + +class pkhbt_shifted(Armv7mShiftedLogical): # pylint: disable=missing-docstring,invalid-name + pattern = "pkhbt , , , " + inputs = ["Ra", "Rb"] + outputs = ["Rd"] + +# Load +class ldr(Armv7mLoadInstruction): # pylint: disable=missing-docstring,invalid-name + pattern = "ldr , []" + inputs = ["Ra"] + outputs = ["Rd"] + @classmethod + def make(cls, src): + obj = Armv7mInstruction.build(cls, src) + obj.increment = None + obj.pre_index = 0 + obj.addr = obj.args_in[0] + obj.args_in_out_different = [(0,0)] # Can't have Rd==Ra + return obj + + def write(self): + if int(self.pre_index) != 0: + self.immediate = simplify(self.pre_index) + self.pattern = ldr_with_imm.pattern + return super().write() + +class ldr_with_imm(Armv7mLoadInstruction): # pylint: disable=missing-docstring,invalid-name + pattern = "ldr , [, ]" + inputs = ["Ra"] + outputs = ["Rd"] + @classmethod + def make(cls, src): + obj = Armv7mInstruction.build(cls, src) + obj.increment = None + obj.pre_index = obj.immediate + obj.addr = obj.args_in[0] + obj.args_in_out_different = [(0,0)] # Can't have Rd==Ra + return obj + + def write(self): + self.immediate = simplify(self.pre_index) + return super().write() + +class ldrb_with_imm(Armv7mLoadInstruction): # pylint: disable=missing-docstring,invalid-name + pattern = "ldrb , [, ]" + inputs = ["Ra"] + outputs = ["Rd"] + @classmethod + def make(cls, src): + obj = Armv7mInstruction.build(cls, src) + obj.increment = None + obj.pre_index = obj.immediate + obj.args_in_out_different = [(0,0)] # Can't have Rd==Ra + obj.addr = obj.args_in[0] + return obj + + def write(self): + self.immediate = simplify(self.pre_index) + return super().write() + +class ldrh_with_imm(Armv7mLoadInstruction): # pylint: disable=missing-docstring,invalid-name + pattern = "ldrh , [, ]" + inputs = ["Ra"] + outputs = ["Rd"] + @classmethod + def make(cls, src): + obj = Armv7mInstruction.build(cls, src) + obj.increment = None + obj.pre_index = obj.immediate + obj.args_in_out_different = [(0,0)] # Can't have Rd==Ra + obj.addr = obj.args_in[0] + return obj + + def write(self): + self.immediate = simplify(self.pre_index) + return super().write() + +class ldr_with_imm_stack(Armv7mLoadInstruction): # pylint: disable=missing-docstring,invalid-name + pattern = "ldr , [sp, ]" + inputs = [] + outputs = ["Rd"] + @classmethod + def make(cls, src): + obj = Armv7mInstruction.build(cls, src) + obj.increment = None + obj.pre_index = obj.immediate + obj.addr = "sp" + return obj + + def write(self): + self.immediate = simplify(self.pre_index) + return super().write() + +class ldr_with_postinc(Armv7mLoadInstruction): # pylint: disable=missing-docstring,invalid-name + pattern = "ldr , [], " + in_outs = [ "Ra" ] + outputs = ["Rd"] + @classmethod + def make(cls, src): + obj = Armv7mLoadInstruction.build(cls, src) + obj.increment = obj.immediate + obj.pre_index = None + obj.args_inout_out_different = [(0,0)] # Can't have Rd==Ra + obj.addr = obj.args_in_out[0] + return obj + +class ldrh_with_postinc(Armv7mLoadInstruction): # pylint: disable=missing-docstring,invalid-name + pattern = "ldrh , [], " + in_outs = [ "Ra" ] + outputs = ["Rd"] + @classmethod + def make(cls, src): + obj = Armv7mLoadInstruction.build(cls, src) + obj.increment = obj.immediate + obj.args_inout_out_different = [(0,0)] # Can't have Rd==Ra + obj.pre_index = None + obj.addr = obj.args_in_out[0] + return obj + +class Ldrd(Armv7mLoadInstruction): + pass + +class ldrd_imm(Ldrd): # pylint: disable=missing-docstring,invalid-name + pattern = "ldrd , , [, ]" + in_outs = [ "Rc" ] + outputs = ["Ra", "Rb"] + @classmethod + def make(cls, src): + obj = Armv7mLoadInstruction.build(cls, src) + obj.increment = None + obj.pre_index = obj.immediate + obj.addr = obj.args_in_out[0] + return obj + +class ldrd_with_postinc(Ldrd): # pylint: disable=missing-docstring,invalid-name + pattern = "ldrd , , [], " + in_outs = [ "Rc" ] + outputs = ["Ra", "Rb"] + @classmethod + def make(cls, src): + obj = Armv7mLoadInstruction.build(cls, src) + obj.increment = obj.immediate + obj.pre_index = None + obj.addr = obj.args_in_out[0] + return obj + +class ldr_with_inc_writeback(Armv7mLoadInstruction): # pylint: disable=missing-docstring,invalid-name + pattern = "ldr , [, ]!" + in_outs = [ "Ra" ] + outputs = ["Rd"] + @classmethod + def make(cls, src): + obj = Armv7mInstruction.build(cls, src) + obj.increment = obj.immediate + obj.pre_index = None + obj.addr = obj.args_in_out[0] + return obj + +class ldm_interval(Armv7mLoadInstruction): # pylint: disable=missing-docstring,invalid-name + pattern = "ldm , " + inputs = ["Ra"] + outputs = [] + + def write(self): + reg_from = self.args_out[0] + reg_to = self.args_out[-1] + self.range = f"{{{reg_from}-{reg_to}}}" + return super().write() + + + @classmethod + def make(cls, src): + obj = Armv7mLoadInstruction.build(cls, src) + reg_type = Armv7mInstruction._infer_register_type(obj.range_type) + num_regs = len(RegisterType.list_registers(reg_type)) + obj.increment = (obj.range_end-obj.range_start+1) * 4 # word sized loads + obj.args_out = [f"{obj.range_type}{i}" for i in range(obj.range_start, obj.range_end+1)] + obj.num_out = len(obj.args_out) + obj.arg_types_out = [RegisterType.GPR] * obj.num_out + obj.args_out_restrictions = [[ f"r{i+j}" for j in range(0, num_regs-obj.num_out)] for i in range(0, obj.num_out) ] + obj.args_out_combinations = [ ( list(range(0, obj.num_out)), [ [ f"r{i+j}" for i in range(0, obj.num_out)] for j in range(0, num_regs-obj.num_out) ] )] + return obj + +class ldm_interval_inc_writeback(Armv7mLoadInstruction): # pylint: disable=missing-docstring,invalid-name + pattern = "ldm !, " + in_outs = ["Ra"] + outputs = [] + + def write(self): + reg_from = self.args_out[0] + reg_to = self.args_out[-1] + self.range = f"{{{reg_from}-{reg_to}}}" + return super().write() + + + @classmethod + def make(cls, src): + obj = Armv7mLoadInstruction.build(cls, src) + reg_type = Armv7mInstruction._infer_register_type(obj.range_type) + num_regs = len(RegisterType.list_registers(reg_type)) + obj.increment = (obj.range_end-obj.range_start+1) * 4 # word sized loads + obj.args_out = [f"{obj.range_type}{i}" for i in range(obj.range_start, obj.range_end+1)] + obj.num_out = len(obj.args_out) + obj.arg_types_out = [RegisterType.GPR] * obj.num_out + obj.args_out_restrictions = [[ f"r{i+j}" for j in range(0, num_regs-obj.num_out)] for i in range(0, obj.num_out) ] + obj.args_out_combinations = [ ( list(range(0, obj.num_out)), [ [ f"r{i+j}" for i in range(0, obj.num_out)] for j in range(0, num_regs-obj.num_out) ] )] + return obj + +class vldr_with_imm(Armv7mLoadInstruction): # pylint: disable=missing-docstring,invalid-name + pattern = "vldr , [, ]" + inputs = ["Ra"] + outputs = ["Sd"] + @classmethod + def make(cls, src): + obj = Armv7mInstruction.build(cls, src) + obj.increment = None + obj.pre_index = obj.immediate + obj.addr = obj.args_in[0] + return obj + + def write(self): + self.immediate = simplify(self.pre_index) + return super().write() + + +class vldr_with_postinc(Armv7mLoadInstruction): # pylint: disable=missing-docstring,invalid-name + pattern = "vldr , [], " + in_outs = ["Ra"] + outputs = ["Sd"] + @classmethod + def make(cls, src): + obj = Armv7mLoadInstruction.build(cls, src) + obj.increment = obj.immediate + obj.pre_index = None + obj.addr = obj.args_in_out[0] + return obj + +class vldm_interval_inc_writeback(Armv7mLoadInstruction): # pylint: disable=missing-docstring,invalid-name + pattern = "vldm !, " + in_outs = ["Ra"] + outputs = [] + def write(self): + reg_from = self.args_out[0] + reg_to = self.args_out[-1] + self.range = f"{{{reg_from}-{reg_to}}}" + return super().write() + + + @classmethod + def make(cls, src): + obj = Armv7mLoadInstruction.build(cls, src) + reg_type = Armv7mInstruction._infer_register_type(obj.range_type) + num_regs = len(RegisterType.list_registers(reg_type)) + obj.increment = (obj.range_end-obj.range_start+1) * 4 # word sized loads + obj.args_out = [f"{obj.range_type}{i}" for i in range(obj.range_start, obj.range_end+1)] + obj.num_out = len(obj.args_out) + obj.arg_types_out = [RegisterType.FPR] * obj.num_out + obj.args_out_restrictions = [[ f"s{i+j}" for j in range(0, num_regs-obj.num_out)] for i in range(0, obj.num_out) ] + obj.args_out_combinations = [ ( list(range(0, obj.num_out)), [ [ f"s{i+j}" for i in range(0, obj.num_out)] for j in range(0, num_regs-obj.num_out) ] )] + return obj +# Store + +class str_no_off(Armv7mStoreInstruction): # pylint: disable=missing-docstring,invalid-name + pattern = "str , []" + inputs = ["Ra", "Rd"] + outputs = [] + @classmethod + def make(cls, src): + obj = Armv7mInstruction.build(cls, src) + obj.increment = None + obj.pre_index = 0 + obj.addr = obj.args_in[0] + return obj + + def write(self): + if int(self.pre_index) != 0: + self.immediate = simplify(self.pre_index) + self.pattern = str_with_imm.pattern + return super().write() + +class strh_with_imm(Armv7mStoreInstruction): # pylint: disable=missing-docstring,invalid-name + pattern = "strh , [, ]" + inputs = ["Ra", "Rd"] + outputs = [] + @classmethod + def make(cls, src): + obj = Armv7mInstruction.build(cls, src) + obj.increment = None + obj.pre_index = obj.immediate + obj.addr = obj.args_in[0] + return obj + + def write(self): + self.immediate = simplify(self.pre_index) + return super().write() + +class str_with_imm(Armv7mStoreInstruction): # pylint: disable=missing-docstring,invalid-name + pattern = "str , [, ]" + inputs = ["Ra", "Rd"] + outputs = [] + @classmethod + def make(cls, src): + obj = Armv7mInstruction.build(cls, src) + obj.increment = None + obj.pre_index = obj.immediate + obj.addr = obj.args_in[0] + return obj + + def write(self): + self.immediate = simplify(self.pre_index) + return super().write() + +class str_with_imm_stack(Armv7mStoreInstruction): # pylint: disable=missing-docstring,invalid-name + pattern = "str , [sp, ]" + inputs = ["Rd"] + outputs = [] + @classmethod + def make(cls, src): + obj = Armv7mInstruction.build(cls, src) + obj.increment = None + obj.pre_index = obj.immediate + obj.addr = "sp" + return obj + + def write(self): + self.immediate = simplify(self.pre_index) + return super().write() + +class str_with_postinc(Armv7mStoreInstruction): # pylint: disable=missing-docstring,invalid-name + pattern = "str , [], " + inputs = ["Rd"] + in_outs = ["Ra"] + @classmethod + def make(cls, src): + obj = Armv7mStoreInstruction.build(cls, src) + obj.increment = obj.immediate + obj.pre_index = None + obj.addr = obj.args_in_out[0] + return obj + +class strh_with_postinc(Armv7mStoreInstruction): # pylint: disable=missing-docstring,invalid-name + pattern = "strh , [], " + inputs = ["Rd"] + in_outs = ["Ra"] + @classmethod + def make(cls, src): + obj = Armv7mStoreInstruction.build(cls, src) + obj.increment = obj.immediate + obj.pre_index = None + obj.addr = obj.args_in_out[0] + return obj + +class stm_interval_inc_writeback(Armv7mLoadInstruction): # pylint: disable=missing-docstring,invalid-name + pattern = "stm !, " + in_outs = ["Ra"] + outputs = [] + + def write(self): + reg_from = self.args_in[0] + reg_to = self.args_in[-1] + self.range = f"{{{reg_from}-{reg_to}}}" + return super().write() + + @classmethod + def make(cls, src): + obj = Armv7mLoadInstruction.build(cls, src) + reg_type = Armv7mInstruction._infer_register_type(obj.range_type) + num_regs = len(RegisterType.list_registers(reg_type)) + obj.increment = (obj.range_end-obj.range_start+1) * 4 # word sized loads + obj.args_in = [f"{obj.range_type}{i}" for i in range(obj.range_start, obj.range_end+1)] + obj.num_in = len(obj.args_in) + obj.arg_types_in = [RegisterType.GPR] * obj.num_in + obj.args_in_restrictions = [[ f"r{i+j}" for j in range(0, num_regs-obj.num_in)] for i in range(0, obj.num_in) ] + obj.args_in_combinations = [ ( list(range(0, obj.num_in)), [ [ f"r{i+j}" for i in range(0, obj.num_in)] for j in range(0, num_regs-obj.num_in) ] )] + return obj +# Other +class cmp(Armv7mBasicArithmetic): # pylint: disable=missing-docstring,invalid-name + pattern = "cmp , " + inputs = ["Ra", "Rb"] + modifiesFlags=True + dependsOnFlags=True + +class cmp_imm(Armv7mBasicArithmetic): # pylint: disable=missing-docstring,invalid-name + pattern = "cmp , " + inputs = ["Ra"] + modifiesFlags=True + +class Spill: + def spill(reg, loc, spill_to_vreg=None): + """Generates the instruction text for a spill to either + the stack or the FPR. If spill_to_vreg is None (default), + the spill goes to the stack. Otherwise, spill_to_vreg must + be an integer defining the base of the registers in the FPR + which should be used as a stack. For example, passing 8 would + spill to s8,s9,.. .""" + if spill_to_vreg is None: + return f"str {reg}, [sp, #STACK_LOC_{loc}]" + else: + vreg_base = int(spill_to_vreg) + return f"vmov s{vreg_base+int(loc)}, {reg}" + def restore(reg, loc, spill_to_vreg=None): + """Generates the instruction text for a spill restore from either + the stack or the FPR. If spill_to_vreg is None (default), + the spill goes to the stack. Otherwise, spill_to_vreg must + be an integer defining the base of the registers in the FPR + which should be used as a stack. For example, passing 8 would + spill to s8,s9,.. .""" + if spill_to_vreg is None: + return f"ldr {reg}, [sp, #STACK_LOC_{loc}]" + else: + vreg_base = int(spill_to_vreg) + return f"vmov {reg}, s{vreg_base+int(loc)}" + +def ldm_interval_splitting_cb(): + def core(inst,t,log=None): + + ptr = inst.args_in[0] + regs = inst.args_out + width = inst.width + + ldrs = [] + offset = 0 + for r in regs: + ldr = Armv7mInstruction.build( + ldr_with_imm, {"width": width, "Rd": r, "Ra": ptr, "imm": f"#{offset}"}) + ldr.pre_index = offset + ldrs.append(ldr) + offset += 4 + + ldr_src = SourceLine(ldr.write()).\ + add_tags(inst.source_line.tags).\ + add_comments(inst.source_line.comments) + ldr.source_line = ldr_src + + if log is not None: + log(f"ldm splitting: {t.inst}; {[ldr for ldr in ldrs]}") + + t.changed = True + t.inst = ldrs + return True + + return core + +ldm_interval.global_fusion_cb = ldm_interval_splitting_cb() + +def stm_interval_inc_writeback_splitting_cb(): + def core(inst,t,log=None): + + ptr = inst.args_in_out[0] + regs = inst.args_in + width = inst.width + + strs = [] + offset = (len(regs) - 1) * 4 + for r in regs[:0:-1]: + store = Armv7mInstruction.build( + str_with_imm, {"width": width, "Rd": r, "Ra": ptr, "imm": f"#{offset}"}) + store.pre_index = offset + strs.append(store) + offset -= 4 + # Final store includes increment + store = Armv7mInstruction.build( + str_with_postinc, {"width": width, "Rd": regs[0], "Ra": ptr, "imm": f"#{len(regs) * 4}"}) + strs.append(store) + + for store in strs: + store_src = SourceLine(store.write()).\ + add_tags(inst.source_line.tags).\ + add_comments(inst.source_line.comments) + store.source_line = store_src + + if log is not None: + log(f"stm! splitting: {t.inst}; {[store for store in strs]}") + + t.changed = True + t.inst = strs + return True + + return core + +stm_interval_inc_writeback.global_fusion_cb = stm_interval_inc_writeback_splitting_cb() + +def ldm_interval_inc_writeback_splitting_cb(): + def core(inst,t,log=None): + + ptr = inst.args_in_out[0] + regs = inst.args_out + width = inst.width + + ldrs = [] + offset = (len(regs) - 1) * 4 + for r in regs[:0:-1]: + ldr = Armv7mInstruction.build( + ldr_with_imm, {"width": width, "Rd": r, "Ra": ptr, "imm": f"#{offset}"}) + ldr.pre_index = offset + ldrs.append(ldr) + offset -= 4 + # Final load includes increment + ldr = Armv7mInstruction.build( + ldr_with_postinc, {"width": width, "Rd": regs[0], "Ra": ptr, "imm": f"#{len(regs) * 4}"}) + ldrs.append(ldr) + + for ldr in ldrs: + ldr_src = SourceLine(ldr.write()).\ + add_tags(inst.source_line.tags).\ + add_comments(inst.source_line.comments) + ldr.source_line = ldr_src + + if log is not None: + log(f"ldm! splitting: {t.inst}; {[ldr for ldr in ldrs]}") + + t.changed = True + t.inst = ldrs + return True + + return core + +ldm_interval_inc_writeback.global_fusion_cb = ldm_interval_inc_writeback_splitting_cb() + +def vldm_interval_inc_writeback_splitting_cb(): + def core(inst,t,log=None): + + ptr = inst.args_in_out[0] + regs = inst.args_out + width = inst.width + + ldrs = [] + offset = 0 + for r in regs: + ldr = Armv7mInstruction.build( + vldr_with_imm, {"width": width, "Sd": r, "Ra": ptr, "imm": f"#{offset}"}) + ldr.pre_index = offset + ldrs.append(ldr) + offset += 4 + + add_ptr = Armv7mInstruction.build( + add_imm, {"width": width, "Rd": ptr, "Ra": ptr, "imm": f"#{offset}"}) + ldrs.append(add_ptr) + + for ldr in ldrs: + ldr_src = SourceLine(ldr.write()).\ + add_tags(inst.source_line.tags).\ + add_comments(inst.source_line.comments) + ldr.source_line = ldr_src + + if log is not None: + log(f"ldm! splitting: {t.inst}; {[ldr for ldr in ldrs]}") + + t.changed = True + t.inst = ldrs + return True + + return core + +vldm_interval_inc_writeback.global_fusion_cb = vldm_interval_inc_writeback_splitting_cb() + +def ldrd_postinc_splitting_cb(): + def core(inst,t,log=None): + + ptr = inst.args_in_out[0] + regs = inst.args_out + width = inst.width + + ldrs = [] + + ldr = Armv7mInstruction.build( + ldr_with_imm, {"width": width, "Rd": regs[1], "Ra": ptr, "imm": "#4"}) + ldr.pre_index = 4 + ldrs.append(ldr) + # Final load includes increment + ldr = Armv7mInstruction.build( + ldr_with_postinc, {"width": width, "Rd": regs[0], "Ra": ptr, "imm": "#8"}) + ldr.increment = 8 + ldr.pre_index = None + ldr.addr = ptr + ldrs.append(ldr) + + for ldr in ldrs: + ldr_src = SourceLine(ldr.write()).\ + add_tags(inst.source_line.tags).\ + add_comments(inst.source_line.comments) + ldr.source_line = ldr_src + + if log is not None: + log(f"ldrd splitting: {t.inst}; {[ldr for ldr in ldrs]}") + + t.changed = True + t.inst = ldrs + return True + + return core + +ldrd_with_postinc.global_fusion_cb = ldrd_postinc_splitting_cb() + + + +def ldrd_imm_splitting_cb(): + def core(inst,t,log=None): + + ptr = inst.args_in_out[0] + regs = inst.args_out + width = inst.width + + ldrs = [] + + ldr = Armv7mInstruction.build( + ldr_with_imm, {"width": width, "Rd": regs[0], "Ra": ptr, "imm": inst.pre_index }) + ldr.pre_index = inst.pre_index + ldrs.append(ldr) + # Final load includes increment + ldr = Armv7mInstruction.build( + ldr_with_imm, {"width": width, "Rd": regs[1], "Ra": ptr, "imm": f"{inst.pre_index}+4"}) + ldr.pre_index = f"{inst.pre_index}+4" + ldr.addr = ptr + ldrs.append(ldr) + + for ldr in ldrs: + ldr_src = SourceLine(ldr.write()).\ + add_tags(inst.source_line.tags).\ + add_comments(inst.source_line.comments) + ldr.source_line = ldr_src + + if log is not None: + log(f"ldrd splitting: {t.inst}; {[ldr for ldr in ldrs]}") + + t.changed = True + t.inst = ldrs + return True + + return core + +ldrd_imm.global_fusion_cb = ldrd_imm_splitting_cb() + +# Returns the list of all subclasses of a class which don't have +# subclasses themselves +def all_subclass_leaves(c): + + def has_subclasses(cl): + return len(cl.__subclasses__()) > 0 + def is_leaf(c): + return not has_subclasses(c) + + def all_subclass_leaves_core(leaf_lst, todo_lst): + leaf_lst += filter(is_leaf, todo_lst) + todo_lst = [ csub + for c in filter(has_subclasses, todo_lst) + for csub in c.__subclasses__() ] + if len(todo_lst) == 0: + return leaf_lst + return all_subclass_leaves_core(leaf_lst, todo_lst) + + return all_subclass_leaves_core([], [c]) + +Instruction.all_subclass_leaves = all_subclass_leaves(Instruction) + +def iter_armv7m_instructions(): + yield from all_subclass_leaves(Instruction) + +def find_class(src): + for inst_class in iter_armv7m_instructions(): + if isinstance(src,inst_class): + return inst_class + raise UnknownInstruction(f"Couldn't find instruction class for {src} (type {type(src)})") + +def lookup_multidict(d, inst, default=None): + instclass = find_class(inst) + for l,v in d.items(): + # Multidict entries can be the following: + # - An instruction class. It matches any instruction of that class. + # - A callable. It matches any instruction returning `True` when passed + # to the callable. + # - A tuple of instruction classes or callables. It matches any instruction + # which matches at least one element in the tuple. + def match(x): + if inspect.isclass(x): + return isinstance(inst, x) + assert callable(x) + return x(inst) + if not isinstance(l, tuple): + l = [l] + for lp in l: + if match(lp): + return v + if default is None: + raise UnknownInstruction(f"Couldn't find {instclass} for {inst}") + return default diff --git a/slothy/targets/arm_v7m/cortex_m4.py b/slothy/targets/arm_v7m/cortex_m4.py new file mode 100644 index 00000000..6a573411 --- /dev/null +++ b/slothy/targets/arm_v7m/cortex_m4.py @@ -0,0 +1,75 @@ + +""" +Experimental Cortex-M4 microarchitecture model for SLOTHY + +WARNING: The data in this module is approximate and may contain errors. +""" + +################################### NOTE ############################################### +### ### +### WARNING: The data in this module is approximate and may contain errors. ### +### They are _NOT_ an official software optimization guide for Cortex-M4. ### +### ### +######################################################################################## + +from enum import Enum +from slothy.targets.arm_v7m.arch_v7m import * + +issue_rate = 1 +llvm_mca_target = "cortex-m4" + +class ExecutionUnit(Enum): + """Enumeration of execution units in Cortex-M4 model""" + UNIT=0 + def __repr__(self): + return self.name + +# Opaque function called by SLOTHY to add further microarchitecture- +# specific constraints which are not encapsulated by the general framework. +def add_further_constraints(slothy): + if slothy.config.constraints.functional_only: + return + +# Opaque function called by SLOTHY to add further microarchitecture- +# specific objectives. +def has_min_max_objective(config): + """Adds Cortex-""" + _ = config + return False +def get_min_max_objective(slothy): + _ = slothy + return + +execution_units = { + (adds, add, add_short, add_imm, add_imm_short, add_shifted, sub_shifted, sub_imm_short, mul, smull, smlal, log_and, log_or, eor, eor_shifted, bic, bic_shifted, ror, ldr_with_imm, str_with_imm): ExecutionUnit.UNIT, +} + +inverse_throughput = { + ( adds, add, add_short, add_imm, add_imm_short, add_shifted, sub_shifted, sub_imm_short, mul, smull, smlal, log_and, log_or, eor, eor_shifted, bic, bic_shifted, ror ) : 1, + (ldr_with_imm, str_with_imm) : 2} + +default_latencies = { + (adds, add, add_short, add_imm, add_imm_short, add_shifted, sub_shifted, sub_imm_short, mul, smull, smlal, log_and, log_or, eor, eor_shifted, bic, bic_shifted, ror): 1, + (ldr_with_imm, str_with_imm) : 2 +} + +def get_latency(src, out_idx, dst): + _ = out_idx # out_idx unused + + instclass_src = find_class(src) + instclass_dst = find_class(dst) + + latency = lookup_multidict( + default_latencies, src) + + return latency + +def get_units(src): + units = lookup_multidict(execution_units, src) + if isinstance(units,list): + return units + return [units] + +def get_inverse_throughput(src): + return lookup_multidict( + inverse_throughput, src) diff --git a/slothy/targets/arm_v7m/cortex_m7.py b/slothy/targets/arm_v7m/cortex_m7.py new file mode 100644 index 00000000..f6c2ac2c --- /dev/null +++ b/slothy/targets/arm_v7m/cortex_m7.py @@ -0,0 +1,353 @@ +""" +Experimental Cortex-M7 microarchitecture model for SLOTHY + +WARNING: The data in this module is approximate and may contain errors. +""" + +################################### NOTE ############################################### +### ### +### WARNING: The data in this module is approximate and may contain errors. ### +### They are _NOT_ an official software optimization guide for Cortex-M7. ### +### ### +### Sources used in constructing this model: ### +### - ARMv7-M Architecture Reference Manual (ARM DDI 0403E.e) ### +### - https://github.com/jnk0le/random/tree/master/pipeline%20cycle%20test#cortex-m7 ### +### - https://www.quinapalus.com/cm7cycles.html ### +######################################################################################## + +from enum import Enum +from itertools import product +from slothy.targets.arm_v7m.arch_v7m import * +import re +from sympy import simplify + +issue_rate = 2 +llvm_mca_target = "cortex-m7" + + +class ExecutionUnit(Enum): + """Enumeration of execution units in Cortex-M7 model""" + + STORE = 0 + ALU0 = 1 + ALU1 = 2 + MAC = 5 + FPU = 6 + LOAD0 = 7 + LOAD1 = 8 + SIMD = 9 + + def __repr__(self): + return self.name + def ALU(): # pylint: disable=invalid-name + return [ExecutionUnit.ALU0, ExecutionUnit.ALU1] + def SHIFT(): # pylint: disable=invalid-name + return [ExecutionUnit.SHIFT0, ExecutionUnit.SHIFT1] + def LOAD(): # pylint: disable=invalid-name + return [ExecutionUnit.LOAD0, ExecutionUnit.LOAD1] + + +# Opaque function called by SLOTHY to add further microarchitecture- +# specific constraints which are not encapsulated by the general framework. +def add_further_constraints(slothy): + if slothy.config.constraints.functional_only: + return + # add_slot_constraints(slothy) + add_st_hazard(slothy) + + add_dsp_slot_constraint(slothy) + +def add_dsp_slot_constraint(slothy): + slothy.restrict_slots_for_instructions_by_class( + [pkhbt, pkhtb, pkhbt_shifted, ubfx_imm, uadd16, usub16, sadd16, ssub16], [0]) + +# TODO: this seems incorrect +def add_slot_constraints(slothy): + slothy.restrict_slots_for_instructions_by_class( + [str_with_imm, str_with_imm_stack, str_with_postinc, strh_with_imm, + strh_with_postinc, stm_interval_inc_writeback, str_no_off, str], [1]) + + +def add_st_hazard(slothy): + def is_st_ld_pair(inst_a, inst_b): + return (isinstance(inst_a.inst, ldr_with_imm) or isinstance(inst_a.inst, ldr_with_imm_stack)) \ + and (isinstance(inst_b.inst, str_with_imm) or isinstance(inst_b.inst, str_with_imm_stack)) + + def evaluate_immediate(string_expr): + if string_expr is None: + return 0 + string_expr = str(string_expr) + return int(simplify(string_expr)) + + for t_load, t_store in slothy.get_inst_pairs(cond=is_st_ld_pair): + if t_load.is_locked and t_store.is_locked: + continue + + ldr_imm = evaluate_immediate(t_load.inst.immediate) + str_imm = evaluate_immediate(t_store.inst.immediate) + + if abs(ldr_imm - str_imm) >= 8: + continue + + ldr_before_str = slothy._NewBoolVar("") + ldr_after_str = slothy._NewBoolVar("") + slothy._AddExactlyOne([ldr_before_str, ldr_after_str]) + slothy._Add(t_load.program_start_var < t_store.program_start_var).OnlyEnforceIf(ldr_before_str) + slothy._Add(t_load.program_start_var >= t_store.program_start_var + 8).OnlyEnforceIf(ldr_after_str) + + +# Opaque function called by SLOTHY to add further microarchitecture- +# specific objectives. +def has_min_max_objective(config): + _ = config + return False + + +def get_min_max_objective(slothy): + _ = slothy + return + + +execution_units = { + ( + ldr, + ldr_with_imm, + ldr_with_imm_stack, + ldr_with_inc_writeback, + ldr_with_postinc, + ldrb_with_imm, + ldrh_with_imm, + ldrh_with_postinc, + vldr_with_imm, vldr_with_postinc # TODO: also FPU? + ): ExecutionUnit.LOAD(), + ( + Ldrd, + ldm_interval, + ldm_interval_inc_writeback, + vldm_interval_inc_writeback): [ExecutionUnit.LOAD()], + ( + str_with_imm, + str_with_imm_stack, + str_with_postinc, + str_no_off, + strh_with_imm, + strh_with_postinc, + stm_interval_inc_writeback + ): [[ExecutionUnit.STORE, ExecutionUnit.MAC]], + ( + movw_imm, + movt_imm, + adds, + add, + add_short, + add_imm, + add_imm_short, + sub, subs_imm_short, sub_imm_short, + neg_short, + log_and, + log_or, + eor, eor_short, eors, eors_short, + bic, bics, + cmp, cmp_imm, + ): ExecutionUnit.ALU(), + (ror, ror_short, rors_short, lsl, asr, asrs): [[ExecutionUnit.ALU0], [ExecutionUnit.ALU1]], + (mul, mul_short, smull, smlal, mla, mls, smulwb, smulwt, smultb, smultt, + smulbb, smlabt, smlabb, smlatt, smlad, smladx, smuad, smuadx, smmulr): [ExecutionUnit.MAC], + (vmov_gpr, vmov_gpr2, vmov_gpr2_dual): [ExecutionUnit.FPU], + (uadd16, sadd16, usub16, ssub16): list(map(list, product(ExecutionUnit.ALU(), [ExecutionUnit.SIMD]))), + (pkhbt, pkhtb, pkhbt_shifted, ubfx_imm): [[ExecutionUnit.ALU0, ExecutionUnit.SIMD]], + (Armv7mShiftedArithmetic): [[ExecutionUnit.ALU0]], + (Armv7mShiftedLogical): [[ExecutionUnit.ALU0]], +} +inverse_throughput = { + ( + ldr, + ldr_with_imm, + ldr_with_imm_stack, + ldr_with_inc_writeback, + ldr_with_postinc, + Ldrd, + ldrb_with_imm, + ldrh_with_imm, + ldrh_with_postinc, + vldr_with_imm, vldr_with_postinc, # TODO: double-check + # actually not, just placeholder + ldm_interval, ldm_interval_inc_writeback, vldm_interval_inc_writeback, + movw_imm, + movt_imm, + adds, + add, + add_short, + add_imm, + add_imm_short, + add_shifted, + sub_shifted, + sub_imm_short, + subs_imm_short, + uadd16, sadd16, usub16, ssub16, + mul, mul_short, + smull, + smlal, + mla, mls, smulwb, smulwt, smultb, smultt, smulbb, smlabt, smlabb, smlatt, smlad, smladx, smuad, smuadx, smmulr, + neg_short, + log_and, log_and_shifted, + log_or, log_or_shifted, + eor, eor_short, eors, eors_short, + eor_shifted, + bic, bics, + bic_shifted, + ror, ror_short, rors_short, lsl, asr, asrs, + cmp, cmp_imm, + vmov_gpr, + vmov_gpr2, vmov_gpr2_dual, # verify for dual + pkhbt, pkhtb, pkhbt_shifted, ubfx_imm, + str_with_imm, + str_with_imm_stack, + str_with_postinc, + str_no_off, + strh_with_imm, + strh_with_postinc, + + ): 1, + ( + stm_interval_inc_writeback, # actually not, just placeholder + vmov_gpr2_dual): 2 +} + +default_latencies = { + ( + movw_imm, + movt_imm, + adds, + add, + add_short, + add_imm, + add_imm_short, + add_shifted, + sub_shifted, + sub_imm_short, + subs_imm_short, + uadd16, sadd16, usub16, ssub16, + neg_short, + log_and, log_and_shifted, + log_or, log_or_shifted, + eor, eor_short, eors, eors_short, + bic, bics, + bic_shifted, + ror, ror_short, rors_short, lsl, asr, asrs, + cmp, cmp_imm, + pkhbt, pkhtb, pkhbt_shifted, ubfx_imm, + vldr_with_imm, vldr_with_postinc, # according to Jan + # actually not, just placeholder + ldm_interval, ldm_interval_inc_writeback, vldm_interval_inc_writeback, + str_with_imm, + str_with_imm_stack, + str_with_postinc, + str_no_off, + strh_with_imm, + strh_with_postinc, + ): 1, + ( + mul, mul_short, + smull, + smlal, + mla, mls, smulwb, smulwt, smultb, smultt, smulbb, smlabt, smlabb, smlatt, smlad, smladx, smuad, smuadx, smmulr, + # TODO: Verify load latency + stm_interval_inc_writeback, # actually not, just placeholder + ldr, + ldr_with_imm, + ldr_with_imm_stack, + ldr_with_inc_writeback, + ldr_with_postinc, + ldrb_with_imm, + ldrh_with_imm, + ldrh_with_postinc, + eor_shifted + ): 2, + (Ldrd): 3, + (vmov_gpr2, vmov_gpr2_dual): 3, + (vmov_gpr): 1 +} + + +def get_latency(src, out_idx, dst): + _ = out_idx # out_idx unused + + instclass_src = find_class(src) + instclass_dst = find_class(dst) + + latency = lookup_multidict(default_latencies, src) + + # Forwarding path to MAC instructions + if instclass_dst in [mla, mls, smlabb, smlabt, smlatt] and src.args_out[0] == dst.args_in[2]: + latency = latency - 1 + + if instclass_dst in [smlal] and \ + (src.args_out[0] == dst.args_in_out[0] or src.args_out[0] == dst.args_in_out[1]): + latency = latency - 1 + + # Multiply accumulate chain latency is 1 + if instclass_src in [smlal] and instclass_dst in [smlal] and \ + src.args_in_out[0] == dst.args_in_out[0] and \ + src.args_in_out[1] == dst.args_in_out[1]: + return 1 + + # Load latency is 1 cycle if the destination is an arithmetic/logical instruction + if instclass_src in [ldr_with_imm, ldr_with_imm_stack, ldr_with_inc_writeback] and \ + sum([issubclass(instclass_dst, pc) for pc in [Armv7mBasicArithmetic, Armv7mLogical]]) and \ + src.args_out[0] in dst.args_in: + latency = latency - 1 + + # Shifted operand needs to be available one cycle early + # TODO: verify how this applies to ubfx with imm + if sum([issubclass(instclass_dst, pc) for pc in [Armv7mShiftedLogical, Armv7mShiftedArithmetic, pkhbt, pkhtb, pkhbt_shifted]]) and \ + dst.args_in[1] in src.args_out or \ + sum([issubclass(instclass_dst, pc) for pc in [ubfx_imm]]) and \ + dst.args_in[0] in src.args_out: + return latency + 1 + + + # Load and store multiples take a long time to complete + if instclass_src in [ldm_interval, ldm_interval_inc_writeback, stm_interval_inc_writeback, vldm_interval_inc_writeback]: + latency = (src.range_end - src.range_start) + 1 + + # Can always store result in the same cycle + # TODO: double-check this + if dst.is_store(): + return 0 + + return latency + + +def get_units(src): + units = lookup_multidict(execution_units, src) + + + def evaluate_immediate(string_expr): + if string_expr is None: + return 0 + string_expr = str(string_expr) + return int(simplify(string_expr)) + + # The Cortex-M7 has two memory banks + # If two loads use the same memory bank, they cannot dual issue + # There are no constraints which load can go to which issue slot + # Approximiation: Only look at immediates, i.e., assume all pointers are aligned to 8 bytes + if src.is_ldr(): + imm = evaluate_immediate(src.immediate) + + if (imm % 8) // 4 == 0: + return [ExecutionUnit.LOAD0] + else: + return [ExecutionUnit.LOAD1] + + if isinstance(units, list): + return units + return [units] + +def get_inverse_throughput(src): + itp = lookup_multidict(inverse_throughput, src) + if find_class(src) in [ldm_interval, ldm_interval_inc_writeback, stm_interval_inc_writeback, vldm_interval_inc_writeback]: + itp = (src.range_end - src.range_start) + 1 + + return itp diff --git a/slothy/targets/query.py b/slothy/targets/query.py index 7e4b27c6..dbebf7a7 100644 --- a/slothy/targets/query.py +++ b/slothy/targets/query.py @@ -30,6 +30,9 @@ and microarchitecture models for SLOTHY. """ +from slothy.targets.arm_v7m import arch_v7m as Arch_Armv7M +from slothy.targets.arm_v7m import cortex_m7 as Target_CortexM7 + from slothy.targets.arm_v81m import arch_v81m as Arch_Armv81M from slothy.targets.arm_v81m import cortex_m55r1 as Target_CortexM55r1 from slothy.targets.arm_v81m import cortex_m85r1 as Target_CortexM85r1 @@ -50,12 +53,14 @@ class UnknownTarget(Exception): class Archery: """This is a small helper class for querying architectures""" - _archs = { "Arm_v81M" : Arch_Armv81M, + _archs = { "Arm_v7M" : Arch_Armv7M, + "Arm_v81M" : Arch_Armv81M, "Arm_AArch64" : AArch64_Neon } _targets = { "Arm_Cortex_M55" : Target_CortexM55r1, "Arm_Cortex_M85" : Target_CortexM85r1, "Arm_Helium_Experimental" : Target_Helium_Experimental, + "Arm_Cortex_M7" : Target_CortexM7, "Arm_Cortex_A55" : Target_CortexA55, "Arm_Cortex_A72_frontend" : Target_CortexA72_Frontend, "Arm_Neoverse_N1_experimental" : Target_NeoverseN1_Experimental, From a8a4fbb3731869b2534bd0b866bd7877bcc25cb2 Mon Sep 17 00:00:00 2001 From: Amin Abdulrahman Date: Mon, 2 Dec 2024 12:05:30 +0100 Subject: [PATCH 13/16] Armv7m: Remove stack register types --- slothy/targets/arm_v7m/arch_v7m.py | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/slothy/targets/arm_v7m/arch_v7m.py b/slothy/targets/arm_v7m/arch_v7m.py index 43154983..0e9eb014 100644 --- a/slothy/targets/arm_v7m/arch_v7m.py +++ b/slothy/targets/arm_v7m/arch_v7m.py @@ -14,10 +14,8 @@ class RegisterType(Enum): GPR = 1 FPR = 2 - STACK_FPR = 3 - STACK_GPR = 4 - FLAGS = 5 - HINT = 6 + FLAGS = 3 + HINT = 4 def __str__(self): return self.name @@ -34,9 +32,6 @@ def spillable(reg_type): def list_registers(reg_type, only_extra=False, only_normal=False, with_variants=False): """Return the list of all registers of a given type""" - stack_locations = [ f"STACK{i}" for i in range(8) ] - fpstack_locations = [ f"STACK{i}" for i in range(8) ] - gprs_normal = [ f"r{i}" for i in range(15) ] fprs_normal = [ f"s{i}" for i in range(31) ] @@ -59,9 +54,7 @@ def list_registers(reg_type, only_extra=False, only_normal=False, with_variants= fprs += fprs_extra return { RegisterType.GPR : gprs, - RegisterType.STACK_GPR : stack_locations, RegisterType.FPR : fprs, - RegisterType.STACK_FPR : fpstack_locations, RegisterType.HINT : hints, RegisterType.FLAGS : flags}[reg_type] @@ -89,9 +82,7 @@ def is_renamed(ty): def from_string(string): """Find registe type from string""" string = string.lower() - return { "fprstack" : RegisterType.STACK_FPR, - "stack" : RegisterType.STACK_GPR, - "fpr" : RegisterType.FPR, + return { "fpr" : RegisterType.FPR, "gpr" : RegisterType.GPR, "hint" : RegisterType.HINT, "flags" : RegisterType.FLAGS}.get(string,None) From 00b2a6fd4499344504b89184659899fe1c6343d9 Mon Sep 17 00:00:00 2001 From: "Matthias J. Kannwischer" Date: Thu, 28 Nov 2024 16:03:39 +0800 Subject: [PATCH 14/16] remove new_fixup from software pipelining for M7 --- slothy/targets/arm_v7m/arch_v7m.py | 54 ++++++++++++++++-------------- 1 file changed, 28 insertions(+), 26 deletions(-) diff --git a/slothy/targets/arm_v7m/arch_v7m.py b/slothy/targets/arm_v7m/arch_v7m.py index 0e9eb014..1ce30e4f 100644 --- a/slothy/targets/arm_v7m/arch_v7m.py +++ b/slothy/targets/arm_v7m/arch_v7m.py @@ -154,25 +154,27 @@ def start(self, loop_cnt, indentation=0, fixup=0, unroll=1, jump_if_empty=None, logging.debug(f"Loop counter {loop_cnt} is incremented by {inc_per_iter} per iteration") # Check whether instructions modifying the loop count moved to # pre/postamble and adjust the fixup based on that. - new_fixup = 0 - if postamble_code is not None: - new_fixup = 0 - for l in postamble_code: - if l.text == "": - continue - inst = Instruction.parser(l) - if loop_cnt in inst[0].args_in_out and inst[0].increment is not None: - new_fixup = new_fixup + simplify(inst[0].increment) - - if new_fixup != 0 or fixup != 0: + # new_fixup = 0 + # if postamble_code is not None: + # new_fixup = 0 + # for l in postamble_code: + # if l.text == "": + # continue + # inst = Instruction.parser(l) + # if loop_cnt in inst[0].args_in_out and inst[0].increment is not None: + # new_fixup = new_fixup + simplify(inst[0].increment) + + # if new_fixup != 0 or fixup != 0: + if fixup != 0: yield f"{indent}push {{{self.additional_data['end']}}}" yield f"{indent}vmov {self.additional_data['end']}, {self.additional_data['endf']}" - if new_fixup != 0: - yield f"{indent}sub {self.additional_data['end']}, {self.additional_data['end']}, #{new_fixup}" + # if new_fixup != 0: + # yield f"{indent}sub {self.additional_data['end']}, {self.additional_data['end']}, #{new_fixup}" if fixup != 0: yield f"{indent}sub {self.additional_data['end']}, {self.additional_data['end']}, #{fixup*inc_per_iter}" - if new_fixup != 0 or fixup != 0: + #if new_fixup != 0 or fixup != 0: + if fixup != 0: yield f"{indent}vmov {self.additional_data['endf']}, {self.additional_data['end']}" yield f"{indent}pop {{{self.additional_data['end']}}}" if jump_if_empty is not None: @@ -221,18 +223,18 @@ def start(self, loop_cnt, indentation=0, fixup=0, unroll=1, jump_if_empty=None, # Check whether instructions modifying the loop count moved to # pre/postamble and adjust the fixup based on that. - new_fixup = 0 - if postamble_code is not None: - new_fixup = 0 - for l in postamble_code: - if l.text == "": - continue - inst = Instruction.parser(l) - if loop_cnt in inst[0].args_in_out and inst[0].increment is not None: - new_fixup = new_fixup + simplify(inst[0].increment) - - if new_fixup != 0: - yield f"{indent}sub {self.additional_data['end']}, {self.additional_data['end']}, #{new_fixup}" + # new_fixup = 0 + # if postamble_code is not None: + # new_fixup = 0 + # for l in postamble_code: + # if l.text == "": + # continue + # inst = Instruction.parser(l) + # if loop_cnt in inst[0].args_in_out and inst[0].increment is not None: + # new_fixup = new_fixup + simplify(inst[0].increment) + + # if new_fixup != 0: + # yield f"{indent}sub {self.additional_data['end']}, {self.additional_data['end']}, #{new_fixup}" if fixup != 0: yield f"{indent}sub {self.additional_data['end']}, {self.additional_data['end']}, #{fixup*inc_per_iter}" From 528321cb4deb9d4941cb2697894e208f85e3a58b Mon Sep 17 00:00:00 2001 From: Amin Abdulrahman Date: Mon, 2 Dec 2024 13:46:28 +0100 Subject: [PATCH 15/16] Armv7m: Add vmov cmp loop example --- example.py | 18 ++++++++++ examples/opt/armv7m/armv7m_simple0_opt_m7.s | 38 ++++++++++----------- examples/opt/armv7m/loop_vmov_cmp_opt_m7.s | 31 +++++++++++++++++ 3 files changed, 68 insertions(+), 19 deletions(-) create mode 100644 examples/opt/armv7m/loop_vmov_cmp_opt_m7.s diff --git a/example.py b/example.py index 6e38f856..9f2a360e 100644 --- a/example.py +++ b/example.py @@ -688,6 +688,23 @@ def core(self,slothy): slothy.config.variable_size=True slothy.config.outputs = ["r6"] slothy.optimize_loop("start") + +class Armv7mLoopVmovCmp(Example): + def __init__(self, var="", arch=Arch_Armv7M, target=Target_CortexM7): + name = "loop_vmov_cmp" + infile = name + + if var != "": + name += f"_{var}" + infile += f"_{var}" + name += f"_{target_label_dict[target]}" + + super().__init__(infile, name, rename=True, arch=arch, target=target) + + def core(self,slothy): + slothy.config.variable_size=True + slothy.config.outputs = ["r6"] + slothy.optimize_loop("start") class ntt_kyber_123_4567(Example): def __init__(self, var="", arch=AArch64_Neon, target=Target_CortexA55, timeout=None): @@ -1486,6 +1503,7 @@ def main(): LoopLe(), Armv7mLoopSubs(), Armv7mLoopCmp(), + Armv7mLoopVmovCmp(), CRT(), diff --git a/examples/opt/armv7m/armv7m_simple0_opt_m7.s b/examples/opt/armv7m/armv7m_simple0_opt_m7.s index 87fd5e61..98abb396 100644 --- a/examples/opt/armv7m/armv7m_simple0_opt_m7.s +++ b/examples/opt/armv7m/armv7m_simple0_opt_m7.s @@ -1,24 +1,24 @@ start: - // Instructions: 6 - // Expected cycles: 5 - // Expected IPC: 1.20 - // - // Cycle bound: 5.0 - // IPC bound: 1.20 - // - // Wall time: 0.01s - // User time: 0.01s - // - // ----- cycle (expected) ------> - // 0 25 - // |------------------------|---- - ldr r7, [r0, #4] // *............................. - add r7, r2, r7 // .*............................ - eor.w r12, r7, r3 // ..*........................... - smlabt r12, r2, r2, r12 // ..*........................... - asrs r3, r12, #1 // ....*......................... - str r3, [r0, #4] // ....*......................... + // Instructions: 6 + // Expected cycles: 5 + // Expected IPC: 1.20 + // + // Cycle bound: 5.0 + // IPC bound: 1.20 + // + // Wall time: 0.02s + // User time: 0.02s + // + // ----- cycle (expected) ------> + // 0 25 + // |------------------------|---- + ldr r6, [r0, #4] // *............................. + add r6, r2, r6 // .*............................ + eor.w r3, r6, r3 // ..*........................... + smlabt r12, r2, r2, r3 // ..*........................... + asrs r3, r12, #1 // ....*......................... + str r3, [r0, #4] // ....*......................... // ------ cycle (expected) ------> // 0 25 diff --git a/examples/opt/armv7m/loop_vmov_cmp_opt_m7.s b/examples/opt/armv7m/loop_vmov_cmp_opt_m7.s new file mode 100644 index 00000000..c75f8b9b --- /dev/null +++ b/examples/opt/armv7m/loop_vmov_cmp_opt_m7.s @@ -0,0 +1,31 @@ +/* For example, r5 represents an address where we will stop iterating and r6 is +the actual pointer which is incremented inside the loop. */ + +mov.w r6, #0 +add.w r5, r6, #64 +vmov s0, r5 + +start: + // Instructions: 1 + // Expected cycles: 1 + // Expected IPC: 1.00 + // + // Cycle bound: 1.0 + // IPC bound: 1.00 + // + // Wall time: 0.02s + // User time: 0.02s + // + // ----- cycle (expected) ------> + // 0 25 + // |------------------------|---- + add r6, r6, #4 // *............................. + + // ------ cycle (expected) ------> + // 0 25 + // |------------------------|----- + // add r6, r6, #4 // *.............................. + + vmov r5, s0 + cmp r6, r5 + bne start \ No newline at end of file From 98e876cfb567a104765e54d1285ac7a1773f32ad Mon Sep 17 00:00:00 2001 From: Amin Abdulrahman Date: Mon, 2 Dec 2024 13:53:46 +0100 Subject: [PATCH 16/16] Armv7m: Add comments explaining loop types --- slothy/targets/arm_v7m/arch_v7m.py | 49 ++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/slothy/targets/arm_v7m/arch_v7m.py b/slothy/targets/arm_v7m/arch_v7m.py index 1ce30e4f..fd0dc73b 100644 --- a/slothy/targets/arm_v7m/arch_v7m.py +++ b/slothy/targets/arm_v7m/arch_v7m.py @@ -123,6 +123,28 @@ def unconditional(lbl): class VmovCmpLoop(Loop): + """ + Loop ending in a vmov, a compare, and a branch. + + The modification to the value we compare against happens inside the loop + body. The value that is being compared to is stashed to a floating point + register before the loop starts and therefore needs to be recovered before + the comparison. + + WARNING: This type of loop is experimental as slothy has no knowledge about + what happens inside the loop boundary! Especially, a register is written + inside the boundary which may be used for renaming by slothy. Use with + caution. + + Example: + ``` + loop_lbl: + {code} + vmov , + cmp , + (cbnz|bnz|bne) loop_lbl + ``` where cnt is the loop counter in lr. + """ def __init__(self, lbl="lbl", lbl_start="1", lbl_end="2", loop_init="lr") -> None: super().__init__(lbl_start=lbl_start, lbl_end=lbl_end, loop_init=loop_init) self.lbl = lbl @@ -193,6 +215,21 @@ def end(self, other, indentation=0): yield f'{indent}bne {lbl_start}' class CmpLoop(Loop): + """ + Loop ending in a compare and a branch. + The modification to the value we compare against happens inside the loop body. + WARNING: This type of loop is experimental as slothy has no knowledge about + what happens inside the loop boundary! Use with caution. + + Example: + ``` + loop_lbl: + {code} + cmp , + (cbnz|bnz|bne) loop_lbl + ``` + where cnt is the loop counter in lr. + """ def __init__(self, lbl="lbl", lbl_start="1", lbl_end="2", loop_init="lr") -> None: super().__init__(lbl_start=lbl_start, lbl_end=lbl_end, loop_init=loop_init) self.lbl_regex = r"^\s*(?P