From bc3b4aa447574546cabd4913f748ceb84204f36d Mon Sep 17 00:00:00 2001 From: "Matthias J. Kannwischer" Date: Wed, 6 Nov 2024 15:24:46 +0800 Subject: [PATCH] add example for .equ --- example.py | 19 ++++++ examples/naive/aarch64/aarch64_simple0_equ.s | 28 +++++++++ .../opt/aarch64/aarch64_simple0_equ_opt_a55.s | 62 +++++++++++++++++++ 3 files changed, 109 insertions(+) create mode 100644 examples/naive/aarch64/aarch64_simple0_equ.s create mode 100644 examples/opt/aarch64/aarch64_simple0_equ_opt_a55.s diff --git a/example.py b/example.py index c5d1e06a..aa3ea54b 100644 --- a/example.py +++ b/example.py @@ -547,6 +547,24 @@ def core(self,slothy): slothy.config.constraints.stalls_first_attempt=32 slothy.optimize() +class AArch64Example0Equ(Example): + def __init__(self, var="", arch=AArch64_Neon, target=Target_CortexA55): + name = "aarch64_simple0_equ" + infile = name + + if var != "": + name += f"_{var}" + infile += f"_{var}" + name += f"_{target_label_dict[target]}" + + super().__init__(infile, name, rename=True, arch=arch, target=target) + + def core(self,slothy): + slothy.config.variable_size=True + slothy.config.constraints.stalls_first_attempt=32 + slothy.optimize(start="start", end="end") + + class AArch64Example1(Example): def __init__(self, var="", arch=AArch64_Neon, target=Target_CortexA55): name = "aarch64_simple0_macros" @@ -1370,6 +1388,7 @@ def main(): AArch64Example0(), AArch64Example0(target=Target_CortexA72), + AArch64Example0Equ(), AArch64Example1(), AArch64Example1(target=Target_CortexA72), AArch64Example2(), diff --git a/examples/naive/aarch64/aarch64_simple0_equ.s b/examples/naive/aarch64/aarch64_simple0_equ.s new file mode 100644 index 00000000..3ebb1167 --- /dev/null +++ b/examples/naive/aarch64/aarch64_simple0_equ.s @@ -0,0 +1,28 @@ + .equ dist, 16 + +start: +ldr q0, [x1, #0] +ldr q1, [x2, #0] + +ldr q8, [x0] +ldr q9, [x0, #1*dist] +ldr q10, [x0, #2*dist] +ldr q11, [x0, #3*dist] + +mul v24.8h, v9.8h, v0.h[0] +sqrdmulh v9.8h, v9.8h, v0.h[1] +mls v24.8h, v9.8h, v1.h[0] +sub v9.8h, v8.8h, v24.8h +add v8.8h, v8.8h, v24.8h + +mul v24.8h, v11.8h, v0.h[0] +sqrdmulh v11.8h, v11.8h, v0.h[1] +mls v24.8h, v11.8h, v1.h[0] +sub v11.8h, v10.8h, v24.8h +add v10.8h, v10.8h, v24.8h + +str q8, [x0], #4*dist +str q9, [x0, #-3*dist] +str q10, [x0, #-2*dist] +str q11, [x0, #-1*dist] +end: \ No newline at end of file diff --git a/examples/opt/aarch64/aarch64_simple0_equ_opt_a55.s b/examples/opt/aarch64/aarch64_simple0_equ_opt_a55.s new file mode 100644 index 00000000..09381cbb --- /dev/null +++ b/examples/opt/aarch64/aarch64_simple0_equ_opt_a55.s @@ -0,0 +1,62 @@ + .equ dist, 16 + + start: + // Instructions: 20 + // Expected cycles: 28 + // Expected IPC: 0.71 + // + // Cycle bound: 28.0 + // IPC bound: 0.71 + // + // Wall time: 0.25s + // User time: 0.25s + // + // ----- cycle (expected) ------> + // 0 25 + // |------------------------|---- + ldr q0, [x1, #0] // *............................. + ldr q7, [x0, #16] // ..*........................... + ldr q13, [x2, #0] // ....*......................... + ldr q24, [x0, #48] // ......*....................... + mul v30.8H, v7.8H, v0.H[0] // ........*..................... + sqrdmulh v14.8H, v7.8H, v0.H[1] // .........*.................... + sqrdmulh v27.8H, v24.8H, v0.H[1] // ..........*................... + mul v20.8H, v24.8H, v0.H[0] // ...........*.................. + ldr q17, [x0] // ............*................. + mls v30.8H, v14.8H, v13.H[0] // ..............*............... + mls v20.8H, v27.8H, v13.H[0] // ...............*.............. + ldr q13, [x0, #32] // ................*............. + sub v10.8H, v17.8H, v30.8H // ..................*........... + add v27.8H, v17.8H, v30.8H // ...................*.......... + sub v0.8H, v13.8H, v20.8H // ....................*......... + str q10, [x0, #16] // .....................*........ + add v8.8H, v13.8H, v20.8H // ......................*....... + str q0, [x0, #48] // .......................*...... + str q27, [x0], #4*16 // .........................*.... + str q8, [x0, #-32] // ...........................*.. + + // ------ cycle (expected) ------> + // 0 25 + // |------------------------|----- + // ldr q0, [x1, #0] // *.............................. + // ldr q1, [x2, #0] // ....*.......................... + // ldr q8, [x0] // ............*.................. + // ldr q9, [x0, #1*16] // ..*............................ + // ldr q10, [x0, #2*16] // ................*.............. + // ldr q11, [x0, #3*16] // ......*........................ + // mul v24.8h, v9.8h, v0.h[0] // ........*...................... + // sqrdmulh v9.8h, v9.8h, v0.h[1] // .........*..................... + // mls v24.8h, v9.8h, v1.h[0] // ..............*................ + // sub v9.8h, v8.8h, v24.8h // ..................*............ + // add v8.8h, v8.8h, v24.8h // ...................*........... + // mul v24.8h, v11.8h, v0.h[0] // ...........*................... + // sqrdmulh v11.8h, v11.8h, v0.h[1] // ..........*.................... + // mls v24.8h, v11.8h, v1.h[0] // ...............*............... + // sub v11.8h, v10.8h, v24.8h // ....................*.......... + // add v10.8h, v10.8h, v24.8h // ......................*........ + // str q8, [x0], #4*16 // .........................*..... + // str q9, [x0, #-3*16] // .....................*......... + // str q10, [x0, #-2*16] // ...........................*... + // str q11, [x0, #-1*16] // .......................*....... + + end: