Skip to content

Commit

Permalink
add example for .equ
Browse files Browse the repository at this point in the history
  • Loading branch information
mkannwischer committed Nov 6, 2024
1 parent 535fac2 commit bc3b4aa
Show file tree
Hide file tree
Showing 3 changed files with 109 additions and 0 deletions.
19 changes: 19 additions & 0 deletions example.py
Original file line number Diff line number Diff line change
Expand Up @@ -547,6 +547,24 @@ def core(self,slothy):
slothy.config.constraints.stalls_first_attempt=32
slothy.optimize()

class AArch64Example0Equ(Example):
def __init__(self, var="", arch=AArch64_Neon, target=Target_CortexA55):
name = "aarch64_simple0_equ"
infile = name

if var != "":
name += f"_{var}"
infile += f"_{var}"
name += f"_{target_label_dict[target]}"

super().__init__(infile, name, rename=True, arch=arch, target=target)

def core(self,slothy):
slothy.config.variable_size=True
slothy.config.constraints.stalls_first_attempt=32
slothy.optimize(start="start", end="end")


class AArch64Example1(Example):
def __init__(self, var="", arch=AArch64_Neon, target=Target_CortexA55):
name = "aarch64_simple0_macros"
Expand Down Expand Up @@ -1370,6 +1388,7 @@ def main():

AArch64Example0(),
AArch64Example0(target=Target_CortexA72),
AArch64Example0Equ(),
AArch64Example1(),
AArch64Example1(target=Target_CortexA72),
AArch64Example2(),
Expand Down
28 changes: 28 additions & 0 deletions examples/naive/aarch64/aarch64_simple0_equ.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
.equ dist, 16

start:
ldr q0, [x1, #0]
ldr q1, [x2, #0]

ldr q8, [x0]
ldr q9, [x0, #1*dist]
ldr q10, [x0, #2*dist]
ldr q11, [x0, #3*dist]

mul v24.8h, v9.8h, v0.h[0]
sqrdmulh v9.8h, v9.8h, v0.h[1]
mls v24.8h, v9.8h, v1.h[0]
sub v9.8h, v8.8h, v24.8h
add v8.8h, v8.8h, v24.8h

mul v24.8h, v11.8h, v0.h[0]
sqrdmulh v11.8h, v11.8h, v0.h[1]
mls v24.8h, v11.8h, v1.h[0]
sub v11.8h, v10.8h, v24.8h
add v10.8h, v10.8h, v24.8h

str q8, [x0], #4*dist
str q9, [x0, #-3*dist]
str q10, [x0, #-2*dist]
str q11, [x0, #-1*dist]
end:
62 changes: 62 additions & 0 deletions examples/opt/aarch64/aarch64_simple0_equ_opt_a55.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
.equ dist, 16

start:
// Instructions: 20
// Expected cycles: 28
// Expected IPC: 0.71
//
// Cycle bound: 28.0
// IPC bound: 0.71
//
// Wall time: 0.25s
// User time: 0.25s
//
// ----- cycle (expected) ------>
// 0 25
// |------------------------|----
ldr q0, [x1, #0] // *.............................
ldr q7, [x0, #16] // ..*...........................
ldr q13, [x2, #0] // ....*.........................
ldr q24, [x0, #48] // ......*.......................
mul v30.8H, v7.8H, v0.H[0] // ........*.....................
sqrdmulh v14.8H, v7.8H, v0.H[1] // .........*....................
sqrdmulh v27.8H, v24.8H, v0.H[1] // ..........*...................
mul v20.8H, v24.8H, v0.H[0] // ...........*..................
ldr q17, [x0] // ............*.................
mls v30.8H, v14.8H, v13.H[0] // ..............*...............
mls v20.8H, v27.8H, v13.H[0] // ...............*..............
ldr q13, [x0, #32] // ................*.............
sub v10.8H, v17.8H, v30.8H // ..................*...........
add v27.8H, v17.8H, v30.8H // ...................*..........
sub v0.8H, v13.8H, v20.8H // ....................*.........
str q10, [x0, #16] // .....................*........
add v8.8H, v13.8H, v20.8H // ......................*.......
str q0, [x0, #48] // .......................*......
str q27, [x0], #4*16 // .........................*....
str q8, [x0, #-32] // ...........................*..

// ------ cycle (expected) ------>
// 0 25
// |------------------------|-----
// ldr q0, [x1, #0] // *..............................
// ldr q1, [x2, #0] // ....*..........................
// ldr q8, [x0] // ............*..................
// ldr q9, [x0, #1*16] // ..*............................
// ldr q10, [x0, #2*16] // ................*..............
// ldr q11, [x0, #3*16] // ......*........................
// mul v24.8h, v9.8h, v0.h[0] // ........*......................
// sqrdmulh v9.8h, v9.8h, v0.h[1] // .........*.....................
// mls v24.8h, v9.8h, v1.h[0] // ..............*................
// sub v9.8h, v8.8h, v24.8h // ..................*............
// add v8.8h, v8.8h, v24.8h // ...................*...........
// mul v24.8h, v11.8h, v0.h[0] // ...........*...................
// sqrdmulh v11.8h, v11.8h, v0.h[1] // ..........*....................
// mls v24.8h, v11.8h, v1.h[0] // ...............*...............
// sub v11.8h, v10.8h, v24.8h // ....................*..........
// add v10.8h, v10.8h, v24.8h // ......................*........
// str q8, [x0], #4*16 // .........................*.....
// str q9, [x0, #-3*16] // .....................*.........
// str q10, [x0, #-2*16] // ...........................*...
// str q11, [x0, #-1*16] // .......................*.......

end:

0 comments on commit bc3b4aa

Please sign in to comment.