From 68031a91ef5527b85dde86e0488db7bc8d8ad936 Mon Sep 17 00:00:00 2001 From: Amin Abdulrahman Date: Wed, 27 Nov 2024 11:32:25 +0100 Subject: [PATCH 1/5] Add Armv7m Arch and CM7 uArch models --- example.py | 58 + examples/naive/armv7m/armv7m_simple0.s | 9 + examples/naive/armv7m/loop_cmp.s | 10 + examples/naive/armv7m/loop_subs.s | 4 + examples/opt/armv7m/armv7m_simple0_opt_m7.s | 33 + examples/opt/armv7m/loop_cmp_opt_m7.s | 29 + examples/opt/armv7m/loop_subs_opt_m7.s | 11 + slothy/targets/arm_v7m/arch_v7m.py | 1882 +++++++++++++++++++ slothy/targets/arm_v7m/cortex_m4.py | 75 + slothy/targets/arm_v7m/cortex_m7.py | 353 ++++ slothy/targets/query.py | 7 +- 11 files changed, 2470 insertions(+), 1 deletion(-) create mode 100644 examples/naive/armv7m/armv7m_simple0.s create mode 100644 examples/naive/armv7m/loop_cmp.s create mode 100644 examples/naive/armv7m/loop_subs.s create mode 100644 examples/opt/armv7m/armv7m_simple0_opt_m7.s create mode 100644 examples/opt/armv7m/loop_cmp_opt_m7.s create mode 100644 examples/opt/armv7m/loop_subs_opt_m7.s create mode 100644 slothy/targets/arm_v7m/arch_v7m.py create mode 100644 slothy/targets/arm_v7m/cortex_m4.py create mode 100644 slothy/targets/arm_v7m/cortex_m7.py diff --git a/example.py b/example.py index 89a295d6..6e38f856 100644 --- a/example.py +++ b/example.py @@ -31,7 +31,9 @@ from slothy import Slothy, Config +import slothy.targets.arm_v7m.arch_v7m as Arch_Armv7M import slothy.targets.arm_v81m.arch_v81m as Arch_Armv81M +import slothy.targets.arm_v7m.cortex_m7 as Target_CortexM7 import slothy.targets.arm_v81m.cortex_m55r1 as Target_CortexM55r1 import slothy.targets.arm_v81m.cortex_m85r1 as Target_CortexM85r1 @@ -43,6 +45,7 @@ target_label_dict = {Target_CortexA55: "a55", Target_CortexA72: "a72", + Target_CortexM7: "m7", Target_CortexM55r1: "m55", Target_CortexM85r1: "m85", Target_AppleM1_firestorm: "m1_firestorm", @@ -76,6 +79,8 @@ def __init__(self, infile, name=None, funcname=None, suffix="opt", subfolder = "" if self.arch == AArch64_Neon: subfolder = "aarch64/" + elif self.arch == Arch_Armv7M: + subfolder = "armv7m/" self.infile_full = f"examples/naive/{subfolder}{self.infile}.s" self.outfile_full = f"examples/opt/{subfolder}{self.outfile}.s" self.name = name @@ -634,7 +639,55 @@ def core(self,slothy): slothy.config.sw_pipelining.optimize_postamble = False slothy.optimize_loop("start") +class Armv7mExample0(Example): + def __init__(self, var="", arch=Arch_Armv7M, target=Target_CortexM7): + name = "armv7m_simple0" + infile = name + + if var != "": + name += f"_{var}" + infile += f"_{var}" + name += f"_{target_label_dict[target]}" + + super().__init__(infile, name, rename=True, arch=arch, target=target) + + def core(self,slothy): + slothy.config.variable_size=True + slothy.config.inputs_are_outputs = True + slothy.optimize(start="start", end="end") + +class Armv7mLoopSubs(Example): + def __init__(self, var="", arch=Arch_Armv7M, target=Target_CortexM7): + name = "loop_subs" + infile = name + if var != "": + name += f"_{var}" + infile += f"_{var}" + name += f"_{target_label_dict[target]}" + + super().__init__(infile, name, rename=True, arch=arch, target=target) + + def core(self,slothy): + slothy.config.variable_size=True + slothy.optimize_loop("start") + +class Armv7mLoopCmp(Example): + def __init__(self, var="", arch=Arch_Armv7M, target=Target_CortexM7): + name = "loop_cmp" + infile = name + + if var != "": + name += f"_{var}" + infile += f"_{var}" + name += f"_{target_label_dict[target]}" + + super().__init__(infile, name, rename=True, arch=arch, target=target) + + def core(self,slothy): + slothy.config.variable_size=True + slothy.config.outputs = ["r6"] + slothy.optimize_loop("start") class ntt_kyber_123_4567(Example): def __init__(self, var="", arch=AArch64_Neon, target=Target_CortexA55, timeout=None): @@ -1425,9 +1478,14 @@ def main(): AArch64Example2(), AArch64Example2(target=Target_CortexA72), + # Armv7m examples + Armv7mExample0(), + # Loop examples AArch64LoopSubs(), LoopLe(), + Armv7mLoopSubs(), + Armv7mLoopCmp(), CRT(), diff --git a/examples/naive/armv7m/armv7m_simple0.s b/examples/naive/armv7m/armv7m_simple0.s new file mode 100644 index 00000000..1b3e77c4 --- /dev/null +++ b/examples/naive/armv7m/armv7m_simple0.s @@ -0,0 +1,9 @@ + +start: +ldr r1, [r0, #4] +add r1, r2, r1 +eor.w r1, r1, r3 +smlabt r3, r2, r2, r1 +asrs r3, r3, #1 +str r3, [r0, #4] +end: \ No newline at end of file diff --git a/examples/naive/armv7m/loop_cmp.s b/examples/naive/armv7m/loop_cmp.s new file mode 100644 index 00000000..be8d09a7 --- /dev/null +++ b/examples/naive/armv7m/loop_cmp.s @@ -0,0 +1,10 @@ +/* For example, r5 represents an address where we will stop iterating and r6 is +the actual pointer which is incremented inside the loop. */ + +mov.w r6, #0 +add.w r5, r6, #64 + +start: + add r6, r6, #4 + cmp.w r6, r5 + bne.w start \ No newline at end of file diff --git a/examples/naive/armv7m/loop_subs.s b/examples/naive/armv7m/loop_subs.s new file mode 100644 index 00000000..5dd05c9c --- /dev/null +++ b/examples/naive/armv7m/loop_subs.s @@ -0,0 +1,4 @@ +movw r5, #16 +start: + subs.w r5, #1 + bne.w start \ No newline at end of file diff --git a/examples/opt/armv7m/armv7m_simple0_opt_m7.s b/examples/opt/armv7m/armv7m_simple0_opt_m7.s new file mode 100644 index 00000000..87fd5e61 --- /dev/null +++ b/examples/opt/armv7m/armv7m_simple0_opt_m7.s @@ -0,0 +1,33 @@ + + start: + // Instructions: 6 + // Expected cycles: 5 + // Expected IPC: 1.20 + // + // Cycle bound: 5.0 + // IPC bound: 1.20 + // + // Wall time: 0.01s + // User time: 0.01s + // + // ----- cycle (expected) ------> + // 0 25 + // |------------------------|---- + ldr r7, [r0, #4] // *............................. + add r7, r2, r7 // .*............................ + eor.w r12, r7, r3 // ..*........................... + smlabt r12, r2, r2, r12 // ..*........................... + asrs r3, r12, #1 // ....*......................... + str r3, [r0, #4] // ....*......................... + + // ------ cycle (expected) ------> + // 0 25 + // |------------------------|----- + // ldr r1, [r0, #4] // *.............................. + // add r1, r2, r1 // .*............................. + // eor.w r1, r1, r3 // ..*............................ + // smlabt r3, r2, r2, r1 // ..*............................ + // asrs r3, r3, #1 // ....*.......................... + // str r3, [r0, #4] // ....*.......................... + + end: diff --git a/examples/opt/armv7m/loop_cmp_opt_m7.s b/examples/opt/armv7m/loop_cmp_opt_m7.s new file mode 100644 index 00000000..4524a7e2 --- /dev/null +++ b/examples/opt/armv7m/loop_cmp_opt_m7.s @@ -0,0 +1,29 @@ +/* For example, r5 represents an address where we will stop iterating and r6 is +the actual pointer which is incremented inside the loop. */ + +mov.w r6, #0 +add.w r5, r6, #64 + +1: + // Instructions: 1 + // Expected cycles: 1 + // Expected IPC: 1.00 + // + // Cycle bound: 1.0 + // IPC bound: 1.00 + // + // Wall time: 0.02s + // User time: 0.02s + // + // ----- cycle (expected) ------> + // 0 25 + // |------------------------|---- + add r6, r6, #4 // *............................. + + // ------ cycle (expected) ------> + // 0 25 + // |------------------------|----- + // add r6, r6, #4 // *.............................. + + cmp r6, r5 + bne 1b \ No newline at end of file diff --git a/examples/opt/armv7m/loop_subs_opt_m7.s b/examples/opt/armv7m/loop_subs_opt_m7.s new file mode 100644 index 00000000..f1bcc451 --- /dev/null +++ b/examples/opt/armv7m/loop_subs_opt_m7.s @@ -0,0 +1,11 @@ +movw r5, #16 +start: + // Instructions: 0 + // Expected cycles: 0 + // Expected IPC: 0.00 + // + // Wall time: 0.00s + // User time: 0.00s + // + subs r5, #1 + bne start \ No newline at end of file diff --git a/slothy/targets/arm_v7m/arch_v7m.py b/slothy/targets/arm_v7m/arch_v7m.py new file mode 100644 index 00000000..43154983 --- /dev/null +++ b/slothy/targets/arm_v7m/arch_v7m.py @@ -0,0 +1,1882 @@ +import logging +import inspect +import re +import math +from enum import Enum +from functools import cache + +from slothy.helper import SourceLine, Loop +from sympy import simplify + +llvm_mca_arch = "arm" # TODO + + +class RegisterType(Enum): + GPR = 1 + FPR = 2 + STACK_FPR = 3 + STACK_GPR = 4 + FLAGS = 5 + HINT = 6 + + def __str__(self): + return self.name + def __repr__(self): + return self.name + + @cache + @staticmethod + def spillable(reg_type): + return reg_type in [RegisterType.GPR] + + @cache + @staticmethod + def list_registers(reg_type, only_extra=False, only_normal=False, with_variants=False): + """Return the list of all registers of a given type""" + + stack_locations = [ f"STACK{i}" for i in range(8) ] + fpstack_locations = [ f"STACK{i}" for i in range(8) ] + + gprs_normal = [ f"r{i}" for i in range(15) ] + fprs_normal = [ f"s{i}" for i in range(31) ] + + gprs_extra = [] + fprs_extra = [] + + gprs = [] + fprs = [] + # TODO: What are hints? + hints = [ f"t{i}" for i in range(100) ] + \ + [ f"t{i}{j}" for i in range(8) for j in range(8) ] + \ + [ f"t{i}_{j}" for i in range(16) for j in range(16) ] + + flags = ["flags"] + if not only_extra: + gprs += gprs_normal + fprs += fprs_normal + if not only_normal: + gprs += gprs_extra + fprs += fprs_extra + + return { RegisterType.GPR : gprs, + RegisterType.STACK_GPR : stack_locations, + RegisterType.FPR : fprs, + RegisterType.STACK_FPR : fpstack_locations, + RegisterType.HINT : hints, + RegisterType.FLAGS : flags}[reg_type] + + @staticmethod + def find_type(r): + """Find type of architectural register""" + + if r.startswith("hint_"): + return RegisterType.HINT + + for ty in RegisterType: + if r in RegisterType.list_registers(ty): + return ty + + return None + + @staticmethod + def is_renamed(ty): + """Indicate if register type should be subject to renaming""" + if ty == RegisterType.HINT: + return False + return True + + @staticmethod + def from_string(string): + """Find registe type from string""" + string = string.lower() + return { "fprstack" : RegisterType.STACK_FPR, + "stack" : RegisterType.STACK_GPR, + "fpr" : RegisterType.FPR, + "gpr" : RegisterType.GPR, + "hint" : RegisterType.HINT, + "flags" : RegisterType.FLAGS}.get(string,None) + + @staticmethod + def default_reserved(): + """Return the list of registers that should be reserved by default""" + # r13 is the stack pointer + return set(["flags", "r13"] + RegisterType.list_registers(RegisterType.HINT)) + + @staticmethod + def default_aliases(): + "Register aliases used by the architecture" + return { + "lr": "r14", + } + +# TODO: Comparison can also be done with {add,sub,...}s +class Branch: + """Helper for emitting branches""" + + @staticmethod + def if_equal(cnt, val, lbl): + """Emit assembly for a branch-if-equal sequence""" + yield f"cmp {cnt}, #{val}" + yield f"beq {lbl}" + + @staticmethod + def if_greater_equal(cnt, val, lbl): + """Emit assembly for a branch-if-greater-equal sequence""" + yield f"cmp {cnt}, #{val}" + yield f"bge {lbl}" + + @staticmethod + def unconditional(lbl): + """Emit unconditional branch""" + yield f"b {lbl}" + + +class VmovCmpLoop(Loop): + def __init__(self, lbl="lbl", lbl_start="1", lbl_end="2", loop_init="lr") -> None: + super().__init__(lbl_start=lbl_start, lbl_end=lbl_end, loop_init=loop_init) + self.lbl = lbl + self.lbl_regex = r"^\s*(?P