Skip to content

Commit

Permalink
Merge branch 'main' into armv7m
Browse files Browse the repository at this point in the history
  • Loading branch information
mkannwischer committed Dec 4, 2024
2 parents 4bf09ed + 76f3043 commit 088f250
Show file tree
Hide file tree
Showing 14 changed files with 315 additions and 24 deletions.
81 changes: 81 additions & 0 deletions example.py
Original file line number Diff line number Diff line change
Expand Up @@ -748,11 +748,89 @@ def __init__(self, var="", arch=AArch64_Neon, target=Target_CortexA55):
infile += f"_{var}"
name += f"_{target_label_dict[target]}"

super().__init__(infile, name, rename=True, arch=arch, target=target)
def core(self,slothy):
slothy.config.allow_useless_instructions = True
slothy.fusion_region("start", "end", ssa=False)

class Armv7mExample0(Example):
def __init__(self, var="", arch=Arch_Armv7M, target=Target_CortexM7):
name = "armv7m_simple0"
infile = name

if var != "":
name += f"_{var}"
infile += f"_{var}"
name += f"_{target_label_dict[target]}"

super().__init__(infile, name, rename=True, arch=arch, target=target)
def core(self,slothy):

class Armv7mExample0(Example):
def __init__(self, var="", arch=Arch_Armv7M, target=Target_CortexM7):
name = "armv7m_simple0"
infile = name

if var != "":
name += f"_{var}"
infile += f"_{var}"
name += f"_{target_label_dict[target]}"

super().__init__(infile, name, rename=True, arch=arch, target=target)

def core(self,slothy):
slothy.config.allow_useless_instructions = True
slothy.fusion_region("start", "end", ssa=False)

class Armv7mLoopSubs(Example):
def __init__(self, var="", arch=Arch_Armv7M, target=Target_CortexM7):
name = "loop_subs"
infile = name

if var != "":
name += f"_{var}"
infile += f"_{var}"
name += f"_{target_label_dict[target]}"

super().__init__(infile, name, rename=True, arch=arch, target=target)

def core(self,slothy):
slothy.config.variable_size=True
slothy.optimize_loop("start")

class Armv7mLoopCmp(Example):
def __init__(self, var="", arch=Arch_Armv7M, target=Target_CortexM7):
name = "loop_cmp"
infile = name

if var != "":
name += f"_{var}"
infile += f"_{var}"
name += f"_{target_label_dict[target]}"

super().__init__(infile, name, rename=True, arch=arch, target=target)

def core(self,slothy):
slothy.config.variable_size=True
slothy.config.outputs = ["r6"]
slothy.optimize_loop("start")

class Armv7mLoopVmovCmp(Example):
def __init__(self, var="", arch=Arch_Armv7M, target=Target_CortexM7):
name = "loop_vmov_cmp"
infile = name

if var != "":
name += f"_{var}"
infile += f"_{var}"
name += f"_{target_label_dict[target]}"

super().__init__(infile, name, rename=True, arch=arch, target=target)

def core(self,slothy):
slothy.config.variable_size=True
slothy.config.outputs = ["r6"]
slothy.optimize_loop("start")

class ntt_kyber_123_4567(Example):
def __init__(self, var="", arch=AArch64_Neon, target=Target_CortexA55, timeout=None):
Expand Down Expand Up @@ -2690,12 +2768,15 @@ def main():
AArch64Example2(target=Target_CortexA72),

AArch64Split0(),
# Armv7m examples
Armv7mExample0(),

# Loop examples
AArch64LoopSubs(),
LoopLe(),
Armv7mLoopSubs(),
Armv7mLoopCmp(),
Armv7mLoopVmovCmp(),

CRT(),

Expand Down
9 changes: 9 additions & 0 deletions examples/naive/armv7m/armv7m_simple0.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@

start:
ldr r1, [r0, #4]
add r1, r2, r1
eor.w r1, r1, r3
smlabt r3, r2, r2, r1
asrs r3, r3, #1
str r3, [r0, #4]
end:
33 changes: 33 additions & 0 deletions examples/opt/armv7m/armv7m_simple0_opt_m7.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@

start:
// Instructions: 6
// Expected cycles: 5
// Expected IPC: 1.20
//
// Cycle bound: 5.0
// IPC bound: 1.20
//
// Wall time: 0.02s
// User time: 0.02s
//
// ----- cycle (expected) ------>
// 0 25
// |------------------------|----
ldr r6, [r0, #4] // *.............................
add r6, r2, r6 // .*............................
eor.w r3, r6, r3 // ..*...........................
smlabt r12, r2, r2, r3 // ..*...........................
asrs r3, r12, #1 // ....*.........................
str r3, [r0, #4] // ....*.........................

// ------ cycle (expected) ------>
// 0 25
// |------------------------|-----
// ldr r1, [r0, #4] // *..............................
// add r1, r2, r1 // .*.............................
// eor.w r1, r1, r3 // ..*............................
// smlabt r3, r2, r2, r1 // ..*............................
// asrs r3, r3, #1 // ....*..........................
// str r3, [r0, #4] // ....*..........................

end:
29 changes: 29 additions & 0 deletions examples/opt/armv7m/loop_cmp_opt_m7.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/* For example, r5 represents an address where we will stop iterating and r6 is
the actual pointer which is incremented inside the loop. */

mov.w r6, #0
add.w r5, r6, #64

1:
// Instructions: 1
// Expected cycles: 1
// Expected IPC: 1.00
//
// Cycle bound: 1.0
// IPC bound: 1.00
//
// Wall time: 0.02s
// User time: 0.02s
//
// ----- cycle (expected) ------>
// 0 25
// |------------------------|----
add r6, r6, #4 // *.............................

// ------ cycle (expected) ------>
// 0 25
// |------------------------|-----
// add r6, r6, #4 // *..............................

cmp r6, r5
bne 1b
11 changes: 11 additions & 0 deletions examples/opt/armv7m/loop_subs_opt_m7.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
movw r5, #16
start:
// Instructions: 0
// Expected cycles: 0
// Expected IPC: 0.00
//
// Wall time: 0.00s
// User time: 0.00s
//
subs r5, #1
bne start
31 changes: 31 additions & 0 deletions examples/opt/armv7m/loop_vmov_cmp_opt_m7.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
/* For example, r5 represents an address where we will stop iterating and r6 is
the actual pointer which is incremented inside the loop. */

mov.w r6, #0
add.w r5, r6, #64
vmov s0, r5

start:
// Instructions: 1
// Expected cycles: 1
// Expected IPC: 1.00
//
// Cycle bound: 1.0
// IPC bound: 1.00
//
// Wall time: 0.02s
// User time: 0.02s
//
// ----- cycle (expected) ------>
// 0 25
// |------------------------|----
add r6, r6, #4 // *.............................

// ------ cycle (expected) ------>
// 0 25
// |------------------------|-----
// add r6, r6, #4 // *..............................

vmov r5, s0
cmp r6, r5
bne start
8 changes: 8 additions & 0 deletions slothy/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,12 @@ def unsafe_address_offset_fixup(self):
str instructions with increment reordered with instructions depending
on the address register).
By default, this is enabled for backwards compatibility.
LIMITATION: For historical reason, this feature cannot be disabled for
the Armv8.1-M architecture model. A refactoring of that model is needed
to make address offset fixup configurable.
Note: The user-imposed safety constraint is not a necessity -- in principle,
SLOTHY could detect when it is safe to reorder ldr/str instructions with increment.
It just hasn't been implemented yet.
Expand Down Expand Up @@ -1291,6 +1297,8 @@ def allow_useless_instructions(self,val):
self._allow_useless_instructions = val
@unsafe_address_offset_fixup.setter
def unsafe_address_offset_fixup(self,val):
if val is False and self.arch.arch_name == "Arm_v81M":
raise InvalidConfig("unsafe address offset fixup must be set for Armv8.1-M")
self._unsafe_address_offset_fixup = val
@locked_registers.setter
def locked_registers(self,val):
Expand Down
5 changes: 3 additions & 2 deletions slothy/core/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1483,8 +1483,6 @@ def optimize(self, source, prefix_len=0, suffix_len=0, log_model=None, retry=Fal
self.result.success = self._solve()
self.result.valid = True

# - Export (optional)
self._export_model()

if not retry and self.success:
self.logger.info("Booleans in result: %d", self._model.cp_solver.NumBooleans())
Expand Down Expand Up @@ -3450,6 +3448,9 @@ def is_good_enough( cur, bound ):

ok = self._model.cp_model.status in [cp_model.FEASIBLE, cp_model.OPTIMAL]

# - Export (optional)
self._export_model()

if ok:
# Remember solution in case we want to retry with an(other) objective
self._model.cp_model.ClearHints()
Expand Down
4 changes: 3 additions & 1 deletion slothy/core/slothy.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,7 +385,9 @@ def fusion_loop(self, loop_lbl, **kwargs):
assert SourceLine.is_source(self.source)

def optimize_loop(self, loop_lbl, postamble_label=None):
"""Optimize the loop starting at a given label"""
"""Optimize the loop starting at a given label
The postamble_label marks the end of the loop kernel.
"""

logger = self.logger.getChild(loop_lbl)

Expand Down
8 changes: 7 additions & 1 deletion slothy/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -1222,11 +1222,11 @@ def _extract(self, source, lbl):
pre = []
body = []
post = []
# candidate lines for the end of the loop
loop_end_candidates = []
loop_lbl_regexp_txt = self.lbl_regex
loop_lbl_regexp = re.compile(loop_lbl_regexp_txt)

# TODO: Allow other forms of looping
# end_regex shall contain group cnt as the counter variable
loop_end_regexp_txt = self.end_regex
loop_end_regexp = [re.compile(txt) for txt in loop_end_regexp_txt]
Expand Down Expand Up @@ -1255,6 +1255,7 @@ def _extract(self, source, lbl):
if state == 1:
p = loop_end_regexp[loop_end_ctr].match(l_str)
if p is not None:
# Case: We may have encountered part of the loop end
# collect all named groups
self.additional_data = self.additional_data | p.groupdict()
loop_end_ctr += 1
Expand All @@ -1263,6 +1264,11 @@ def _extract(self, source, lbl):
state = 2
continue
elif loop_end_ctr > 0 and l_str != "":
# Case: The sequence of loop end candidates was interrupted
# i.e., we found a false-positive or this is not a proper loop

# The loop end candidates are not part of the loop, meaning
# they belonged to the body
body += loop_end_candidates
self.additional_data = {}
loop_end_ctr = 0
Expand Down
Loading

0 comments on commit 088f250

Please sign in to comment.