Skip to content

Commit

Permalink
Benchmarking on A55 (#84)
Browse files Browse the repository at this point in the history
* add exec_wrapper for tests script

Signed-off-by: Thing-han, Lim <[email protected]>

* add ci benchmark on a55 runner

Signed-off-by: Thing-han, Lim <[email protected]>

* fix if condition for the benchmark workflow

Signed-off-by: Thing-han, Lim <[email protected]>

* make parsing of results more robust

Signed-off-by: Matthias J. Kannwischer <[email protected]>

* log cmd on failure

Signed-off-by: Matthias J. Kannwischer <[email protected]>

* remove taskpolicy and replace by exec_wrapper

Signed-off-by: Matthias J. Kannwischer <[email protected]>

* refactor benchmarking yml

Signed-off-by: Matthias J. Kannwischer <[email protected]>

* fix exec wrapper

Signed-off-by: Matthias J. Kannwischer <[email protected]>

* add name of job

Signed-off-by: Matthias J. Kannwischer <[email protected]>

* always turn exec wrapper into a list

Signed-off-by: Matthias J. Kannwischer <[email protected]>

* remove duplicate test script

Signed-off-by: Matthias J. Kannwischer <[email protected]>

* move splitting of exec wrapper

Signed-off-by: Matthias J. Kannwischer <[email protected]>

---------

Signed-off-by: Thing-han, Lim <[email protected]>
Signed-off-by: Matthias J. Kannwischer <[email protected]>
Co-authored-by: Matthias J. Kannwischer <[email protected]>
  • Loading branch information
potsrevennil and mkannwischer authored Jul 2, 2024
1 parent 124e510 commit 41237b3
Show file tree
Hide file tree
Showing 2 changed files with 95 additions and 71 deletions.
19 changes: 15 additions & 4 deletions .github/workflows/bench.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,18 @@ on:
types: [ "labeled" ]
jobs:
bench:
runs-on: self-hosted-rpi4
name: ${{ matrix.target.name }}
strategy:
fail-fast: true
matrix:
target:
- system: rpi4
name: Arm Cortex-A72 (Raspberry Pi 4) benchmarks
cmd: tests bench -c PMU --cflags -mcpu=cortex-a72 -v --output output.json
- system: a55
name: Arm Cortex-A55 (Snapdragon 888) benchmarks
cmd: tests bench -c PERF --cflags "-static -mcpu=cortex-a55" --arch-flags -march=armv8.2-a -w exec-on-a55 -v --output output.json
runs-on: self-hosted-${{ matrix.target.system }}
permissions:
contents: write
if: github.repository_owner == 'pq-code-package' && (github.event.label.name == 'benchmark' || github.ref == 'refs/heads/main')
Expand All @@ -27,7 +38,7 @@ jobs:
- $(uname -a)
- $(nix --version)
- $(astyle --version)
- $(${{ matrix.cross_prefix }}gcc --version | grep -m1 "")
- $(${{ matrix.target.cross_prefix }}gcc --version | grep -m1 "")
- $(bash --version | grep -m1 "")
## CPU Info
Expand All @@ -36,12 +47,12 @@ jobs:
- name: Run benchmark
shell: nix develop .#ci -c bash -e {0}
run: |
tests bench -c PMU --cflags -mcpu=cortex-a72 -v --output output.json
${{ matrix.target.cmd }}
- name: Store benchmark result
if: github.repository_owner == 'pq-code-package' && github.ref == 'refs/heads/main'
uses: benchmark-action/github-action-benchmark@v1
with:
name: Arm Cortex-A72 (Raspberry Pi 4) benchmarks
name: ${{ matrix.target.name }}
tool: 'customSmallerIsBetter'
output-file-path: output.json
github-token: ${{ secrets.GITHUB_TOKEN }}
Expand Down
147 changes: 80 additions & 67 deletions scripts/tests
Original file line number Diff line number Diff line change
Expand Up @@ -25,90 +25,82 @@ def sha256sum(result):
return m.hexdigest()


def base_run(
def base_compile(
bin,
force_qemu,
verbose,
run_as_root=False,
mac_taskpolicy=None,
extra_make_envs={},
extra_make_args=[],
):
"""compile or cross compile with some extra environment variables and makefile arguments"""

def dict2str(dict):
s = ""
for k, v in dict.items():
s += f"{k}={v} "
return s

if force_qemu or (platform.system() == "Linux" and platform.machine() == "x86_64"):
logging.debug(f"Emulating {bin} with QEMU")
if platform.system() == "Linux" and platform.machine() == "x86_64":
logging.debug(f"Cross compiling {bin}")

args = [
"make",
"CROSS_PREFIX=aarch64-none-linux-gnu-",
f"{bin}",
] + extra_make_args

logging.info(dict2str(extra_make_envs) + " ".join(args))

p = subprocess.run(
args,
stdout=subprocess.DEVNULL if not verbose else None,
env=os.environ.copy() | extra_make_envs,
)
if p.returncode != 0:
logging.error(f"make failed: {p.returncode}")
sys.exit(1)

result = subprocess.run(
["qemu-aarch64", f"{bin}"],
capture_output=True,
universal_newlines=False,
)

if result.returncode != 0:
logging.error(
f"Emulating {bin} failed: {result.returncode} {result.stderr.decode()}"
)
sys.exit(1)

else:
logging.debug(f"Running {bin} natively")
logging.debug(f"Compiling {bin} natively")

args = ["make", f"{bin}"] + extra_make_args
logging.info(dict2str(extra_make_envs) + " ".join(args))

p = subprocess.run(
args,
stdout=subprocess.DEVNULL if not verbose else None,
env=os.environ.copy() | extra_make_envs,
)
p = subprocess.run(
args,
stdout=subprocess.DEVNULL if not verbose else None,
env=os.environ.copy() | extra_make_envs,
)

if p.returncode != 0:
logging.error(f"make failed: {p.returncode}")
sys.exit(1)
if p.returncode != 0:
logging.error(f"make failed: {p.returncode}")
sys.exit(1)

cmd = [f"./{bin}"]
if run_as_root:
logging.info(
"Running benchmarks as root -- you may need to enter your root password."
)
cmd = ["sudo"] + cmd

if mac_taskpolicy is not None:
cmd = ["taskpolicy", "-c", mac_taskpolicy] + cmd
def base_run(
bin,
force_qemu,
verbose,
run_as_root=False,
exec_wrapper=None,
):
"""Run the binary in all different ways"""
cmd = [f"./{bin}"]
if force_qemu or (platform.system() == "Linux" and platform.machine() == "x86_64"):
logging.info(f"Emulating {bin} with QEMU")
cmd = ["qemu-aarch64"] + cmd

result = subprocess.run(
cmd,
capture_output=True,
universal_newlines=False,
if run_as_root:
logging.info(
f"Running {bin} as root -- you may need to enter your root password."
)
cmd = ["sudo"] + cmd

if result.returncode != 0:
logging.error(
f"Running {bin} natively failed: {result.returncode} {result.stderr.decode()}"
)
sys.exit(1)
if exec_wrapper:
logging.info(f"Running {bin} with customized wrapper.")
exec_wrapper = exec_wrapper.split(" ")
cmd = exec_wrapper + cmd

logging.info(" ".join(cmd))
result = subprocess.run(
cmd,
capture_output=True,
universal_newlines=False,
)

if result.returncode != 0:
logging.error(
f"Running '{cmd}' failed: {result.returncode} {result.stderr.decode()}"
)
sys.exit(1)

return result.stdout

Expand Down Expand Up @@ -149,7 +141,7 @@ def test_schemes(
force_qemu,
verbose,
run_as_root=False,
mac_taskpolicy=None,
exec_wrapper=None,
extra_make_envs={},
extra_make_args=[],
):
Expand All @@ -165,14 +157,13 @@ def test_schemes(
results = {}
for scheme in SCHEME:
bin = scheme2file(scheme)
base_compile(bin, verbose, extra_make_envs, extra_make_args)
result = base_run(
bin,
force_qemu,
verbose,
run_as_root,
mac_taskpolicy,
extra_make_envs,
extra_make_args,
exec_wrapper,
)
results[scheme] = result

Expand Down Expand Up @@ -279,11 +270,11 @@ def add_options(options):
def run(bin, force_qemu, verbose, cflags, arch_flags):
config_logger(verbose)

base_compile(bin, verbose, process_make_envs(cflags, arch_flags))
result = base_run(
bin,
force_qemu,
verbose,
process_make_envs(cflags, arch_flags),
)
logging.info(str(result, encoding="utf-8"))

Expand Down Expand Up @@ -388,20 +379,41 @@ def kat(force_qemu, verbose, cflags, arch_flags):
type=bool,
help="Benchmarking binary is run with sudo.",
)
@click.option(
"-w",
"--exec-wrapper",
help="Run the benchmark binary with the user-customized wrapper.",
)
@click.option(
"-t",
"--mac-taskpolicy",
nargs=1,
type=click.Choice(["utility", "background", "maintenance"]),
hidden=platform.system() != "Darwin",
show_default=True,
default=None,
help="Run the program using the specified QoS clamp. Applies to MacOS only. Setting this flag to 'background' guarantees running on E-cores.",
help="Run the program using the specified QoS clamp. Applies to MacOS only. Setting this flag to 'background' guarantees running on E-cores. This is an abbreviation of --exec-wrapper 'taskpolicy -c {mac_taskpolicy}'.",
)
def bench(
force_qemu, verbose, cycles, cflags, arch_flags, output, run_as_root, mac_taskpolicy
force_qemu,
verbose,
cycles,
cflags,
arch_flags,
output,
run_as_root,
exec_wrapper,
mac_taskpolicy,
):
config_logger(verbose)

if mac_taskpolicy:
if exec_wrapper:
logging.error(f"cannot set both --mac-taskpolicy and --exec-wrapper")
sys.exit(1)
else:
exec_wrapper = f"taskpolicy -c {mac_taskpolicy}"

results = test_schemes(
"benchmark",
lambda scheme: scheme.name.replace("MLKEM", "test/bin/bench_kyber"),
Expand All @@ -411,7 +423,7 @@ def bench(
force_qemu,
verbose,
run_as_root,
mac_taskpolicy,
exec_wrapper,
process_make_envs(cflags, arch_flags),
[f"CYCLES={cycles}"],
)
Expand All @@ -430,10 +442,11 @@ def bench(
# encaps cycles=X
# decaps cycles=X

d = {
k: int(v)
for k, v in (l.decode().split("=") for l in r.splitlines()[:3])
}
lines = [
line.decode() for line in r.splitlines() if "=" in line.decode()
]

d = {k: int(v) for k, v in (l.split("=") for l in lines)}
for primitive in ["keypair", "encaps", "decaps"]:
v.append(
{
Expand Down

0 comments on commit 41237b3

Please sign in to comment.