Skip to content

Commit

Permalink
Add AArch64 example for assembly conditional
Browse files Browse the repository at this point in the history
  • Loading branch information
mkannwischer committed Dec 4, 2024
1 parent 7f5b3c1 commit 4e374ad
Show file tree
Hide file tree
Showing 3 changed files with 100 additions and 0 deletions.
16 changes: 16 additions & 0 deletions example.py
Original file line number Diff line number Diff line change
Expand Up @@ -706,6 +706,21 @@ def core(self,slothy):
slothy.config.outputs = ["r6"]
slothy.optimize_loop("start")

class AArch64IfElse(Example):
def __init__(self, var="", arch=AArch64_Neon, target=Target_CortexA55):
name = "aarch64_ifelse"
infile = name

if var != "":
name += f"_{var}"
infile += f"_{var}"
name += f"_{target_label_dict[target]}"

super().__init__(infile, name, rename=True, arch=arch, target=target)

def core(self,slothy):
slothy.optimize()

class ntt_kyber_123_4567(Example):
def __init__(self, var="", arch=AArch64_Neon, target=Target_CortexA55, timeout=None):
name = "ntt_kyber_123_4567"
Expand Down Expand Up @@ -1494,6 +1509,7 @@ def main():
AArch64Example1(target=Target_CortexA72),
AArch64Example2(),
AArch64Example2(target=Target_CortexA72),
AArch64IfElse(),

# Armv7m examples
Armv7mExample0(),
Expand Down
30 changes: 30 additions & 0 deletions examples/naive/aarch64/aarch64_ifelse.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
ldr q0, [x1, #0]
ldr q1, [x2, #0]

ldr q8, [x0]
ldr q9, [x0, #1*16]
ldr q10, [x0, #2*16]
ldr q11, [x0, #3*16]
.if 5 != 0
mul v24.8h, v9.8h, v0.h[0]
sqrdmulh v9.8h, v9.8h, v0.h[1]
mls v24.8h, v9.8h, v1.h[0]
sub v9.8h, v8.8h, v24.8h
add v8.8h, v8.8h, v24.8h

.if 5 > 2
mul v24.8h, v11.8h, v0.h[0]
sqrdmulh v11.8h, v11.8h, v0.h[1]
mls v24.8h, v11.8h, v1.h[0]
sub v11.8h, v10.8h, v24.8h
add v10.8h, v10.8h, v24.8h
.else
add v10.8h, v10.8h, v11.8h
.endif
.else
add r0, r1
.endif
str q8, [x0], #4*16
str q9, [x0, #-3*16]
str q10, [x0, #-2*16]
str q11, [x0, #-1*16]
54 changes: 54 additions & 0 deletions examples/opt/aarch64/aarch64_ifelse_opt_a55.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
// Instructions: 20
// Expected cycles: 28
// Expected IPC: 0.71
//
// Wall time: 0.25s
// User time: 0.25s
//
// ----- cycle (expected) ------>
// 0 25
// |------------------------|----
ldr q2, [x0, #48] // *.............................
ldr q4, [x1, #0] // ..*...........................
ldr q13, [x0, #16] // ....*.........................
mul v27.8H, v2.8H, v4.H[0] // ......*.......................
sqrdmulh v6.8H, v2.8H, v4.H[1] // .......*......................
ldr q3, [x2, #0] // ........*.....................
mul v24.8H, v13.8H, v4.H[0] // ..........*...................
ldr q28, [x0, #32] // ...........*..................
mls v27.8H, v6.8H, v3.H[0] // .............*................
sqrdmulh v14.8H, v13.8H, v4.H[1] // ..............*...............
ldr q2, [x0] // ...............*..............
sub v18.8H, v28.8H, v27.8H // .................*............
mls v24.8H, v14.8H, v3.H[0] // ..................*...........
add v9.8H, v28.8H, v27.8H // ....................*.........
str q18, [x0, #48] // .....................*........
add v12.8H, v2.8H, v24.8H // ......................*.......
str q9, [x0, #32] // .......................*......
sub v3.8H, v2.8H, v24.8H // ........................*.....
str q12, [x0], #4*16 // .........................*....
str q3, [x0, #-48] // ...........................*..

// ------ cycle (expected) ------>
// 0 25
// |------------------------|-----
// ldr q0, [x1, #0] // ..*............................
// ldr q1, [x2, #0] // ........*......................
// ldr q8, [x0] // ...............*...............
// ldr q9, [x0, #1*16] // ....*..........................
// ldr q10, [x0, #2*16] // ...........*...................
// ldr q11, [x0, #3*16] // *..............................
// mul v24.8h, v9.8h, v0.h[0] // ..........*....................
// sqrdmulh v9.8h, v9.8h, v0.h[1] // ..............*................
// mls v24.8h, v9.8h, v1.h[0] // ..................*............
// sub v9.8h, v8.8h, v24.8h // ........................*......
// add v8.8h, v8.8h, v24.8h // ......................*........
// mul v24.8h, v11.8h, v0.h[0] // ......*........................
// sqrdmulh v11.8h, v11.8h, v0.h[1] // .......*.......................
// mls v24.8h, v11.8h, v1.h[0] // .............*.................
// sub v11.8h, v10.8h, v24.8h // .................*.............
// add v10.8h, v10.8h, v24.8h // ....................*..........
// str q8, [x0], #4*16 // .........................*.....
// str q9, [x0, #-3*16] // ...........................*...
// str q10, [x0, #-2*16] // .......................*.......
// str q11, [x0, #-1*16] // .....................*.........

0 comments on commit 4e374ad

Please sign in to comment.