Skip to content

Commit

Permalink
Make ldm/stm range parsing more flexible.
Browse files Browse the repository at this point in the history
Currently the Armv7-M arch model is limited to registers lists
consisting of a single range:
e.g., ldm r0, {r1-r7}.

This is not actually the correct restriction from the manual.
Actually, the registers don't have to be consecutive - the only
limitation is that the lowest register is loaded from the lowest address.
Or in other words: the regsiters need to be order.
Also, the range is merely syntactic sugar and one can also just list
all registers.

This commit changes our parsing to be closer to the real world.
It allows to write any register list including ranges.
Internally range gets expanded to the actual list. Instructions
are always written as the full list.

The main motivation for this change that we were recently optimizing code
containing
ldm r0!, {r1-r3,r14}
which cannot be parsed with the current model. With this commit, it parses
correctly.
This does in theory give SLOTHY more freedom in chosing register
allocations, but it is yet to be seen if that is useful.

args_out_combinations can become very big in this case and we may actually have to
restrict that if we are running into performance issues later.
  • Loading branch information
mkannwischer committed Dec 9, 2024
1 parent 7ecfafa commit ce8cb55
Show file tree
Hide file tree
Showing 4 changed files with 163 additions and 75 deletions.
23 changes: 23 additions & 0 deletions examples/naive/armv7m/armv7m_simple0.s
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,27 @@ eor.w r1,r1, r3
smlabt r3,r2, r2, r1
asrs r3, r3,#1
str r3, [r0,#4]

ldm r0, {r1-r2,r14}
add r1, r2,r1
eor.w r1,r1, r14
smlabt r3,r2, r2, r1
asrs r3, r3,#1
str r3, [r0,#4]


ldm r0, {r1-r3}
add r1, r2,r1
eor.w r1,r1, r3
smlabt r3,r2, r2, r1
asrs r3, r3,#1
str r3, [r0,#4]

ldm r0, {r1,r2,r3}
add r1, r2,r1
eor.w r1,r1, r3
smlabt r3,r2, r2, r1
asrs r3, r3,#1
str r3, [r0,#4]

end:
78 changes: 57 additions & 21 deletions examples/opt/armv7m/armv7m_simple0_opt_m7.s
Original file line number Diff line number Diff line change
@@ -1,24 +1,42 @@

start:
// Instructions: 6
// Expected cycles: 5
// Expected IPC: 1.20
//
// Cycle bound: 5.0
// IPC bound: 1.20
//
// Wall time: 0.01s
// User time: 0.01s
//
// ----- cycle (expected) ------>
// 0 25
// |------------------------|----
ldr r12, [r0, #4] // *.............................
add r12,r2,r12 // .*............................
eor.w r10,r12,r3 // ..*...........................
smlabt r10,r2,r2,r10 // ..*...........................
asrs r3,r10,#1 // ....*.........................
str r3,[r0,#4] // ....*.........................
// Instructions: 24
// Expected cycles: 14
// Expected IPC: 1.71
//
// Cycle bound: 14.0
// IPC bound: 1.71
//
// Wall time: 0.39s
// User time: 0.39s
//
// ----- cycle (expected) ------>
// 0 25
// |------------------------|----
ldr r11,[r0,#4] // *.............................
ldm r0,{r7,r9,r14} // .*............................
add r8,r2,r11 // .*............................
eor.w r11,r8,r3 // ..*...........................
smlabt r3,r2,r2,r11 // ..*...........................
add r11,r9,r7 // ....*.........................
ldm r0,{r1,r2,r7} // ....*.........................
eor.w r14,r11,r14 // .....*........................
smlabt r8,r9,r9,r14 // .....*........................
asrs r4,r3,#1 // ......*.......................
str r4,[r0,#4] // ......*.......................
add r3,r2,r1 // .......*......................
ldm r0,{r11,r12,r14} // .......*......................
eor.w r3,r3,r7 // ........*.....................
smlabt r7,r2,r2,r3 // ........*.....................
asrs r9,r8,#1 // .........*....................
str r9,[r0,#4] // .........*....................
asrs r3,r7,#1 // ..........*...................
add r7,r12,r11 // ..........*...................
eor.w r14,r7,r14 // ...........*..................
smlabt r14,r12,r12,r14 // ...........*..................
str r3,[r0,#4] // ............*.................
asrs r14,r14,#1 // .............*................
str r14,[r0,#4] // .............*................

// ------ cycle (expected) ------>
// 0 25
Expand All @@ -27,7 +45,25 @@
// add r1, r2,r1 // .*.............................
// eor.w r1,r1, r3 // ..*............................
// smlabt r3,r2, r2, r1 // ..*............................
// asrs r3, r3,#1 // ....*..........................
// str r3, [r0,#4] // ....*..........................
// asrs r3, r3,#1 // ......*........................
// str r3, [r0,#4] // ......*........................
// ldm r0, {r1-r2,r14} // .*.............................
// add r1, r2,r1 // ....*..........................
// eor.w r1,r1, r14 // .....*.........................
// smlabt r3,r2, r2, r1 // .....*.........................
// asrs r3, r3,#1 // .........*.....................
// str r3, [r0,#4] // .........*.....................
// ldm r0, {r1-r3} // .......*.......................
// add r1, r2,r1 // ..........*....................
// eor.w r1,r1, r3 // ...........*...................
// smlabt r3,r2, r2, r1 // ...........*...................
// asrs r3, r3,#1 // .............*.................
// str r3, [r0,#4] // .............*.................
// ldm r0, {r1,r2,r3} // ....*..........................
// add r1, r2,r1 // .......*.......................
// eor.w r1,r1, r3 // ........*......................
// smlabt r3,r2, r2, r1 // ........*......................
// asrs r3, r3,#1 // ..........*....................
// str r3, [r0,#4] // ............*..................

end:
133 changes: 81 additions & 52 deletions slothy/targets/arm_v7m/arch_v7m.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import inspect
import re
import math
import itertools
from enum import Enum
from functools import cache

Expand Down Expand Up @@ -125,12 +126,12 @@ def unconditional(lbl):
class VmovCmpLoop(Loop):
"""
Loop ending in a vmov, a compare, and a branch.
The modification to the value we compare against happens inside the loop
body. The value that is being compared to is stashed to a floating point
register before the loop starts and therefore needs to be recovered before
the comparison.
the comparison.
WARNING: This type of loop is experimental as slothy has no knowledge about
what happens inside the loop boundary! Especially, a register is written
inside the boundary which may be used for renaming by slothy. Use with
Expand Down Expand Up @@ -218,7 +219,7 @@ class CmpLoop(Loop):
"""
Loop ending in a compare and a branch.
The modification to the value we compare against happens inside the loop body.
WARNING: This type of loop is experimental as slothy has no knowledge about
WARNING: This type of loop is experimental as slothy has no knowledge about
what happens inside the loop boundary! Use with caution.
Example:
Expand Down Expand Up @@ -397,7 +398,7 @@ def __init__(self, *, mnemonic,
self.flag = None
self.width = None
self.barrel = None
self.range = None
self.reg_list = None

def extract_read_writes(self):
"""Extracts 'reads'/'writes' clauses from the source line of the instruction"""
Expand Down Expand Up @@ -651,7 +652,11 @@ def pattern_i(i):
index_pattern = "[0-9]+"
width_pattern = "(?:\.w|\.n|)"
barrel_pattern = "(?:lsl|ror|lsr|asr)\\\\s*"
range_pattern = "\{(?P<range_type>[rs])(?P<range_start>\\\\d+)-[rs](?P<range_end>\\\\d+)\}"

# reg_list is <range>(,<range>)*
# range is [rs]NN(-rsMM)?
range_pat = "([rs]\\\\d+)(-[rs](\\\\d+))?"
reg_list_pattern = "\{"+ range_pat + "(," + range_pat + ")*" +"\}"

src = re.sub(" ", "\\\\s+", src)
src = re.sub(",", "\\\\s*,\\\\s*", src)
Expand All @@ -662,7 +667,7 @@ def pattern_i(i):
src = replace_placeholders(src, "flag", flag_pattern, "flag") # TODO: Are any changes required for IT syntax?
src = replace_placeholders(src, "width", width_pattern, "width")
src = replace_placeholders(src, "barrel", barrel_pattern, "barrel")
src = replace_placeholders(src, "range", range_pattern, "range")
src = replace_placeholders(src, "reg_list", reg_list_pattern, "reg_list")

src = r"\s*" + src + r"\s*(//.*)?\Z"
return src
Expand Down Expand Up @@ -789,6 +794,30 @@ def _instantiate_pattern(s, ty, arg, out):
raise FatalParsingException(f"Failed to replace <{s}> by {rep} in {out}!")
return res

@staticmethod
def _expand_reg_list(reg_list):
"""Expanding list of registers that may contain ranges
Examples:
r1,r2,r3
s1-s7
r1-r3,r14
"""
reg_list = reg_list.replace("{", "")
reg_list = reg_list.replace("}", "")

reg_list_type = reg_list[0]
regs = []
for reg_range in reg_list.split(","):
if "-" in reg_range:
start = reg_range.split("-")[0]
end = reg_range.split("-")[1]
start = int(start.replace(reg_list_type, ""))
end = int(end.replace(reg_list_type, ""))
regs += [f"{reg_list_type}{i}" for i in range(start, end+1)]
else: # not a range, just a register
regs += [reg_range]
return reg_list_type, regs

@staticmethod
def build_core(obj, res):

Expand All @@ -815,10 +844,7 @@ def group_name_i(i):
group_to_attribute('flag', 'flag')
group_to_attribute('width', 'width')
group_to_attribute('barrel', 'barrel')
group_to_attribute('range', 'range')
group_to_attribute('range_start', 'range_start', int)
group_to_attribute('range_end', 'range_end', int)
group_to_attribute('range_type', 'range_type')
group_to_attribute('reg_list', 'reg_list')

for s, ty in obj.pattern_inputs:
if ty == RegisterType.FLAGS:
Expand Down Expand Up @@ -891,7 +917,7 @@ def t_default(x):
out = replace_pattern(out, "index", "index", str)
out = replace_pattern(out, "width", "width", lambda x: x.lower())
out = replace_pattern(out, "barrel", "barrel", lambda x: x.lower())
out = replace_pattern(out, "range", "range", lambda x: x.lower())
out = replace_pattern(out, "reg_list", "reg_list", lambda x: x.lower())

out = out.replace("\\[", "[")
out = out.replace("\\]", "]")
Expand Down Expand Up @@ -1417,53 +1443,53 @@ def make(cls, src):
return obj

class ldm_interval(Armv7mLoadInstruction): # pylint: disable=missing-docstring,invalid-name
pattern = "ldm<width> <Ra>,<range>"
pattern = "ldm<width> <Ra>,<reg_list>"
inputs = ["Ra"]
outputs = []

def write(self):
reg_from = self.args_out[0]
reg_to = self.args_out[-1]
self.range = f"{{{reg_from}-{reg_to}}}"
regs = ",".join(self.args_out)
self.reg_list = f"{{{regs}}}"
return super().write()


@classmethod
def make(cls, src):
obj = Armv7mLoadInstruction.build(cls, src)
reg_type = Armv7mInstruction._infer_register_type(obj.range_type)
num_regs = len(RegisterType.list_registers(reg_type))
obj.increment = (obj.range_end-obj.range_start+1) * 4 # word sized loads
obj.args_out = [f"{obj.range_type}{i}" for i in range(obj.range_start, obj.range_end+1)]
reg_list_type, reg_list = Armv7mInstruction._expand_reg_list(obj.reg_list)

obj.args_out = reg_list
obj.num_out = len(obj.args_out)
obj.arg_types_out = [RegisterType.GPR] * obj.num_out
obj.args_out_restrictions = [[ f"r{i+j}" for j in range(0, num_regs-obj.num_out)] for i in range(0, obj.num_out) ]
obj.args_out_combinations = [ ( list(range(0, obj.num_out)), [ [ f"r{i+j}" for i in range(0, obj.num_out)] for j in range(0, num_regs-obj.num_out) ] )]
available_regs = RegisterType.list_registers(RegisterType.GPR)
obj.args_out_combinations = [ (list(range(0, obj.num_out)), [list(a) for a in itertools.combinations(available_regs, obj.num_out)])]
obj.args_out_restrictions = [ None for _ in range(obj.num_out) ]
return obj

class ldm_interval_inc_writeback(Armv7mLoadInstruction): # pylint: disable=missing-docstring,invalid-name
pattern = "ldm<width> <Ra>!,<range>"
pattern = "ldm<width> <Ra>!,<reg_list>"
in_outs = ["Ra"]
outputs = []

def write(self):
reg_from = self.args_out[0]
reg_to = self.args_out[-1]
self.range = f"{{{reg_from}-{reg_to}}}"
regs = ",".join(self.args_out)
self.reg_list = f"{{{regs}}}"
return super().write()


@classmethod
def make(cls, src):
obj = Armv7mLoadInstruction.build(cls, src)
reg_type = Armv7mInstruction._infer_register_type(obj.range_type)
num_regs = len(RegisterType.list_registers(reg_type))
obj.increment = (obj.range_end-obj.range_start+1) * 4 # word sized loads
obj.args_out = [f"{obj.range_type}{i}" for i in range(obj.range_start, obj.range_end+1)]
reg_list_type, reg_list = Armv7mInstruction._expand_reg_list(obj.reg_list)

obj.args_out = reg_list
obj.num_out = len(obj.args_out)
obj.arg_types_out = [RegisterType.GPR] * obj.num_out
obj.args_out_restrictions = [[ f"r{i+j}" for j in range(0, num_regs-obj.num_out)] for i in range(0, obj.num_out) ]
obj.args_out_combinations = [ ( list(range(0, obj.num_out)), [ [ f"r{i+j}" for i in range(0, obj.num_out)] for j in range(0, num_regs-obj.num_out) ] )]
obj.increment = obj.num_out * 4

available_regs = RegisterType.list_registers(RegisterType.GPR)
obj.args_out_combinations = [ (list(range(0, obj.num_out)), [list(a) for a in itertools.combinations(available_regs, obj.num_out)])]
obj.args_out_restrictions = [ None for _ in range(obj.num_out) ]
return obj

class vldr_with_imm(Armv7mLoadInstruction): # pylint: disable=missing-docstring,invalid-name
Expand Down Expand Up @@ -1496,27 +1522,28 @@ def make(cls, src):
return obj

class vldm_interval_inc_writeback(Armv7mLoadInstruction): # pylint: disable=missing-docstring,invalid-name
pattern = "vldm<width> <Ra>!,<range>"
pattern = "vldm<width> <Ra>!,<reg_list>"
in_outs = ["Ra"]
outputs = []
def write(self):
reg_from = self.args_out[0]
reg_to = self.args_out[-1]
self.range = f"{{{reg_from}-{reg_to}}}"
regs = ",".join(self.args_out)
self.reg_list = f"{{{regs}}}"
return super().write()


@classmethod
def make(cls, src):
obj = Armv7mLoadInstruction.build(cls, src)
reg_type = Armv7mInstruction._infer_register_type(obj.range_type)
num_regs = len(RegisterType.list_registers(reg_type))
obj.increment = (obj.range_end-obj.range_start+1) * 4 # word sized loads
obj.args_out = [f"{obj.range_type}{i}" for i in range(obj.range_start, obj.range_end+1)]
reg_list_type, reg_list = Armv7mInstruction._expand_reg_list(obj.reg_list)

obj.args_out = reg_list
obj.num_out = len(obj.args_out)
obj.arg_types_out = [RegisterType.FPR] * obj.num_out
obj.args_out_restrictions = [[ f"s{i+j}" for j in range(0, num_regs-obj.num_out)] for i in range(0, obj.num_out) ]
obj.args_out_combinations = [ ( list(range(0, obj.num_out)), [ [ f"s{i+j}" for i in range(0, obj.num_out)] for j in range(0, num_regs-obj.num_out) ] )]
obj.increment = obj.num_out * 4

available_regs = RegisterType.list_registers(RegisterType.FPR)
obj.args_out_combinations = [ (list(range(0, obj.num_out)), [list(a) for a in itertools.combinations(available_regs, obj.num_out)])]
obj.args_out_restrictions = [ None for _ in range(obj.num_out) ]
return obj
# Store

Expand Down Expand Up @@ -1611,27 +1638,29 @@ def make(cls, src):
return obj

class stm_interval_inc_writeback(Armv7mLoadInstruction): # pylint: disable=missing-docstring,invalid-name
pattern = "stm<width> <Ra>!,<range>"
pattern = "stm<width> <Ra>!,<reg_list>"
in_outs = ["Ra"]
outputs = []

def write(self):
reg_from = self.args_in[0]
reg_to = self.args_in[-1]
self.range = f"{{{reg_from}-{reg_to}}}"
regs = ",".join(self.args_out)
self.reg_list = f"{{{regs}}}"
return super().write()

@classmethod
def make(cls, src):
obj = Armv7mLoadInstruction.build(cls, src)
reg_type = Armv7mInstruction._infer_register_type(obj.range_type)
num_regs = len(RegisterType.list_registers(reg_type))
obj.increment = (obj.range_end-obj.range_start+1) * 4 # word sized loads
obj.args_in = [f"{obj.range_type}{i}" for i in range(obj.range_start, obj.range_end+1)]

reg_list_type, reg_list = Armv7mInstruction._expand_reg_list(obj.reg_list)

obj.args_in = reg_list
obj.num_in = len(obj.args_in)
obj.arg_types_in = [RegisterType.GPR] * obj.num_in
obj.args_in_restrictions = [[ f"r{i+j}" for j in range(0, num_regs-obj.num_in)] for i in range(0, obj.num_in) ]
obj.args_in_combinations = [ ( list(range(0, obj.num_in)), [ [ f"r{i+j}" for i in range(0, obj.num_in)] for j in range(0, num_regs-obj.num_in) ] )]
obj.increment = obj.num_in * 4

available_regs = RegisterType.list_registers(RegisterType.GPR)
obj.args_in_combinations = [ (list(range(0, obj.num_in)), [list(a) for a in itertools.combinations(available_regs, obj.num_in)])]
obj.args_in_restrictions = [ None for _ in range(obj.num_in) ]
return obj
# Other
class cmp(Armv7mBasicArithmetic): # pylint: disable=missing-docstring,invalid-name
Expand Down
Loading

0 comments on commit ce8cb55

Please sign in to comment.