Skip to content

Commit

Permalink
Add support for .equ in asm (#98)
Browse files Browse the repository at this point in the history
* Add support for .equ in asm

b5f0aa9
3c00fc8
c39ac3c

* remove redundant .equ's

* add example for .equ
  • Loading branch information
mkannwischer authored Nov 8, 2024
1 parent c077093 commit 676a967
Show file tree
Hide file tree
Showing 13 changed files with 130 additions and 12 deletions.
19 changes: 19 additions & 0 deletions example.py
Original file line number Diff line number Diff line change
Expand Up @@ -547,6 +547,24 @@ def core(self,slothy):
slothy.config.constraints.stalls_first_attempt=32
slothy.optimize()

class AArch64Example0Equ(Example):
def __init__(self, var="", arch=AArch64_Neon, target=Target_CortexA55):
name = "aarch64_simple0_equ"
infile = name

if var != "":
name += f"_{var}"
infile += f"_{var}"
name += f"_{target_label_dict[target]}"

super().__init__(infile, name, rename=True, arch=arch, target=target)

def core(self,slothy):
slothy.config.variable_size=True
slothy.config.constraints.stalls_first_attempt=32
slothy.optimize(start="start", end="end")


class AArch64Example1(Example):
def __init__(self, var="", arch=AArch64_Neon, target=Target_CortexA55):
name = "aarch64_simple0_macros"
Expand Down Expand Up @@ -1370,6 +1388,7 @@ def main():

AArch64Example0(),
AArch64Example0(target=Target_CortexA72),
AArch64Example0Equ(),
AArch64Example1(),
AArch64Example1(target=Target_CortexA72),
AArch64Example2(),
Expand Down
28 changes: 28 additions & 0 deletions examples/naive/aarch64/aarch64_simple0_equ.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
.equ dist, 16

start:
ldr q0, [x1, #0]
ldr q1, [x2, #0]

ldr q8, [x0]
ldr q9, [x0, #1*dist]
ldr q10, [x0, #2*dist]
ldr q11, [x0, #3*dist]

mul v24.8h, v9.8h, v0.h[0]
sqrdmulh v9.8h, v9.8h, v0.h[1]
mls v24.8h, v9.8h, v1.h[0]
sub v9.8h, v8.8h, v24.8h
add v8.8h, v8.8h, v24.8h

mul v24.8h, v11.8h, v0.h[0]
sqrdmulh v11.8h, v11.8h, v0.h[1]
mls v24.8h, v11.8h, v1.h[0]
sub v11.8h, v10.8h, v24.8h
add v10.8h, v10.8h, v24.8h

str q8, [x0], #4*dist
str q9, [x0, #-3*dist]
str q10, [x0, #-2*dist]
str q11, [x0, #-1*dist]
end:
1 change: 0 additions & 1 deletion examples/naive/intt_dilithium_12_34_56_78.s
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,6 @@ layer34_loop:
// the scope of our work to optimize this: We only want to demonstrate the
// ability of Helight to optimize the core loops.
barrett_const .req r1
.equ const_barrett, 63
movw barrett_const, #:lower16:const_barrett
movt barrett_const, #:upper16:const_barrett
mov lr, #64
Expand Down
1 change: 0 additions & 1 deletion examples/naive/intt_n256_l6_s32_bar.s
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,6 @@ layer34_loop:

// TEMPORARY: Barrett reduction
barrett_const .req r1
.equ const_barrett, 63
movw barrett_const, #:lower16:const_barrett
movt barrett_const, #:upper16:const_barrett
mov lr, #64
Expand Down
1 change: 0 additions & 1 deletion examples/naive/intt_n256_l6_s32_mont.s
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,6 @@ layer34_loop:
modulus_neg .req r10
neg modulus_neg, modulus
barrett_const .req r1
.equ const_barrett, 63
movw barrett_const, #:lower16:const_barrett
movt barrett_const, #:upper16:const_barrett
mov lr, #64
Expand Down
1 change: 0 additions & 1 deletion examples/naive/intt_n256_l8_s32_bar.s
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,6 @@ layer34_loop:

// TEMPORARY: Barrett reduction
barrett_const .req r1
.equ const_barrett, 63
movw barrett_const, #:lower16:const_barrett
movt barrett_const, #:upper16:const_barrett
mov lr, #64
Expand Down
1 change: 0 additions & 1 deletion examples/naive/intt_n256_l8_s32_mont.s
Original file line number Diff line number Diff line change
Expand Up @@ -819,7 +819,6 @@ layer34_loop:
modulus_neg .req r10
neg modulus_neg, modulus
barrett_const .req r1
.equ const_barrett, 63
movw barrett_const, #:lower16:const_barrett
movt barrett_const, #:upper16:const_barrett
mov lr, #64
Expand Down
62 changes: 62 additions & 0 deletions examples/opt/aarch64/aarch64_simple0_equ_opt_a55.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
.equ dist, 16

start:
// Instructions: 20
// Expected cycles: 28
// Expected IPC: 0.71
//
// Cycle bound: 28.0
// IPC bound: 0.71
//
// Wall time: 0.25s
// User time: 0.25s
//
// ----- cycle (expected) ------>
// 0 25
// |------------------------|----
ldr q0, [x1, #0] // *.............................
ldr q7, [x0, #16] // ..*...........................
ldr q13, [x2, #0] // ....*.........................
ldr q24, [x0, #48] // ......*.......................
mul v30.8H, v7.8H, v0.H[0] // ........*.....................
sqrdmulh v14.8H, v7.8H, v0.H[1] // .........*....................
sqrdmulh v27.8H, v24.8H, v0.H[1] // ..........*...................
mul v20.8H, v24.8H, v0.H[0] // ...........*..................
ldr q17, [x0] // ............*.................
mls v30.8H, v14.8H, v13.H[0] // ..............*...............
mls v20.8H, v27.8H, v13.H[0] // ...............*..............
ldr q13, [x0, #32] // ................*.............
sub v10.8H, v17.8H, v30.8H // ..................*...........
add v27.8H, v17.8H, v30.8H // ...................*..........
sub v0.8H, v13.8H, v20.8H // ....................*.........
str q10, [x0, #16] // .....................*........
add v8.8H, v13.8H, v20.8H // ......................*.......
str q0, [x0, #48] // .......................*......
str q27, [x0], #4*16 // .........................*....
str q8, [x0, #-32] // ...........................*..

// ------ cycle (expected) ------>
// 0 25
// |------------------------|-----
// ldr q0, [x1, #0] // *..............................
// ldr q1, [x2, #0] // ....*..........................
// ldr q8, [x0] // ............*..................
// ldr q9, [x0, #1*16] // ..*............................
// ldr q10, [x0, #2*16] // ................*..............
// ldr q11, [x0, #3*16] // ......*........................
// mul v24.8h, v9.8h, v0.h[0] // ........*......................
// sqrdmulh v9.8h, v9.8h, v0.h[1] // .........*.....................
// mls v24.8h, v9.8h, v1.h[0] // ..............*................
// sub v9.8h, v8.8h, v24.8h // ..................*............
// add v8.8h, v8.8h, v24.8h // ...................*...........
// mul v24.8h, v11.8h, v0.h[0] // ...........*...................
// sqrdmulh v11.8h, v11.8h, v0.h[1] // ..........*....................
// mls v24.8h, v11.8h, v1.h[0] // ...............*...............
// sub v11.8h, v10.8h, v24.8h // ....................*..........
// add v10.8h, v10.8h, v24.8h // ......................*........
// str q8, [x0], #4*16 // .........................*.....
// str q9, [x0, #-3*16] // .....................*.........
// str q10, [x0, #-2*16] // ...........................*...
// str q11, [x0, #-1*16] // .......................*.......

end:
1 change: 0 additions & 1 deletion examples/opt/intt_n256_l6_s32_bar.s
Original file line number Diff line number Diff line change
Expand Up @@ -388,7 +388,6 @@ layer34_loop_end:

// TEMPORARY: Barrett reduction
barrett_const .req r1
.equ const_barrett, 63
movw barrett_const, #:lower16:const_barrett
movt barrett_const, #:upper16:const_barrett
mov lr, #64
Expand Down
1 change: 0 additions & 1 deletion examples/opt/intt_n256_l6_s32_mont.s
Original file line number Diff line number Diff line change
Expand Up @@ -518,7 +518,6 @@ layer34_loop_end:
modulus_neg .req r10
neg modulus_neg, modulus
barrett_const .req r1
.equ const_barrett, 63
movw barrett_const, #:lower16:const_barrett
movt barrett_const, #:upper16:const_barrett
mov lr, #64
Expand Down
1 change: 0 additions & 1 deletion examples/opt/intt_n256_l8_s32_bar.s
Original file line number Diff line number Diff line change
Expand Up @@ -573,7 +573,6 @@ layer34_loop_end:

// TEMPORARY: Barrett reduction
barrett_const .req r1
.equ const_barrett, 63
movw barrett_const, #:lower16:const_barrett
movt barrett_const, #:upper16:const_barrett
mov lr, #64
Expand Down
1 change: 0 additions & 1 deletion examples/opt/intt_n256_l8_s32_mont.s
Original file line number Diff line number Diff line change
Expand Up @@ -1190,7 +1190,6 @@ layer34_loop_end:
modulus_neg .req r10
neg modulus_neg, modulus
barrett_const .req r1
.equ const_barrett, 63
movw barrett_const, #:lower16:const_barrett
movt barrett_const, #:upper16:const_barrett
mov lr, #64
Expand Down
24 changes: 21 additions & 3 deletions slothy/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -589,9 +589,14 @@ def _extract_core(source, lbl_start=None, lbl_end=None):
class AsmAllocation():
"""Helper for tracking register aliases via .req and .unreq"""

# TODO: This is conceptionally different and should be
# handled in its own class.
_REGEXP_EQU_TXT = r"\s*\.equ\s+(?P<key>[A-Za-z0-9\_]+)\s*,\s*(?P<val>[A-Za-z0-9()*/+-]+)"

_REGEXP_REQ_TXT = r"\s*(?P<alias>\w+)\s+\.req\s+(?P<reg>\w+)"
_REGEXP_UNREQ_TXT = r"\s*\.unreq\s+(?P<alias>\w+)"

_REGEXP_EQU = re.compile(_REGEXP_EQU_TXT)
_REGEXP_REQ = re.compile(_REGEXP_REQ_TXT)
_REGEXP_UNREQ = re.compile(_REGEXP_UNREQ_TXT)

Expand Down Expand Up @@ -625,6 +630,12 @@ def check_allocation(line):
reg = p.group("reg")
return alias, reg

p = AsmAllocation._REGEXP_EQU.match(line.text)
if p is not None:
key = p.group("key")
val = p.group("val")
return key, val

return None

@staticmethod
Expand Down Expand Up @@ -683,10 +694,17 @@ def parse_allocs(src):
def unfold_all_aliases(aliases, src):
"""Unfold aliases in assembly source"""
def _apply_single_alias_to_line(alias_from, alias_to, src):
return re.sub(f"(\\W){alias_from}(\\W|\\Z)", f"\\1{alias_to}\\2", src)
res = re.sub(f"(\\W){alias_from}(\\W|\\Z)", f"\\g<1>{alias_to}\\2", src)
return res
def _apply_multiple_aliases_to_line(line):
for (alias_from, alias_to) in aliases.items():
line = _apply_single_alias_to_line(alias_from, alias_to, line)
do_again = True
while do_again:
do_again = False
for (alias_from, alias_to) in aliases.items():
line_new = _apply_single_alias_to_line(alias_from, alias_to, line)
if line_new != line:
do_again = True
line = line_new
return line
res = []
for line in src:
Expand Down

0 comments on commit 676a967

Please sign in to comment.