diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index 04adcbe35..5f903415d 100644 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -9,7 +9,18 @@ on: types: [ "labeled" ] jobs: bench: - runs-on: self-hosted-rpi4 + name: ${{ matrix.target.name }} + strategy: + fail-fast: true + matrix: + target: + - system: rpi4 + name: Arm Cortex-A72 (Raspberry Pi 4) benchmarks + cmd: tests bench -c PMU --cflags -mcpu=cortex-a72 -v --output output.json + - system: a55 + name: Arm Cortex-A55 (Snapdragon 888) benchmarks + cmd: tests bench -c PERF --cflags "-static -mcpu=cortex-a55" --arch-flags -march=armv8.2-a -w exec-on-a55 -v --output output.json + runs-on: self-hosted-${{ matrix.target.system }} permissions: contents: write if: github.repository_owner == 'pq-code-package' && (github.event.label.name == 'benchmark' || github.ref == 'refs/heads/main') @@ -27,7 +38,7 @@ jobs: - $(uname -a) - $(nix --version) - $(astyle --version) - - $(${{ matrix.cross_prefix }}gcc --version | grep -m1 "") + - $(${{ matrix.target.cross_prefix }}gcc --version | grep -m1 "") - $(bash --version | grep -m1 "") ## CPU Info @@ -36,12 +47,12 @@ jobs: - name: Run benchmark shell: nix develop .#ci -c bash -e {0} run: | - tests bench -c PMU --cflags -mcpu=cortex-a72 -v --output output.json + ${{ matrix.target.cmd }} - name: Store benchmark result if: github.repository_owner == 'pq-code-package' && github.ref == 'refs/heads/main' uses: benchmark-action/github-action-benchmark@v1 with: - name: Arm Cortex-A72 (Raspberry Pi 4) benchmarks + name: ${{ matrix.target.name }} tool: 'customSmallerIsBetter' output-file-path: output.json github-token: ${{ secrets.GITHUB_TOKEN }} diff --git a/scripts/tests b/scripts/tests index 7e8b7ad5c..63019bb30 100755 --- a/scripts/tests +++ b/scripts/tests @@ -25,23 +25,22 @@ def sha256sum(result): return m.hexdigest() -def base_run( +def base_compile( bin, - force_qemu, verbose, - run_as_root=False, - mac_taskpolicy=None, extra_make_envs={}, extra_make_args=[], ): + """compile or cross compile with some extra environment variables and makefile arguments""" + def dict2str(dict): s = "" for k, v in dict.items(): s += f"{k}={v} " return s - if force_qemu or (platform.system() == "Linux" and platform.machine() == "x86_64"): - logging.debug(f"Emulating {bin} with QEMU") + if platform.system() == "Linux" and platform.machine() == "x86_64": + logging.debug(f"Cross compiling {bin}") args = [ "make", @@ -49,66 +48,59 @@ def base_run( f"{bin}", ] + extra_make_args - logging.info(dict2str(extra_make_envs) + " ".join(args)) - - p = subprocess.run( - args, - stdout=subprocess.DEVNULL if not verbose else None, - env=os.environ.copy() | extra_make_envs, - ) - if p.returncode != 0: - logging.error(f"make failed: {p.returncode}") - sys.exit(1) - - result = subprocess.run( - ["qemu-aarch64", f"{bin}"], - capture_output=True, - universal_newlines=False, - ) - - if result.returncode != 0: - logging.error( - f"Emulating {bin} failed: {result.returncode} {result.stderr.decode()}" - ) - sys.exit(1) - else: - logging.debug(f"Running {bin} natively") + logging.debug(f"Compiling {bin} natively") args = ["make", f"{bin}"] + extra_make_args logging.info(dict2str(extra_make_envs) + " ".join(args)) - p = subprocess.run( - args, - stdout=subprocess.DEVNULL if not verbose else None, - env=os.environ.copy() | extra_make_envs, - ) + p = subprocess.run( + args, + stdout=subprocess.DEVNULL if not verbose else None, + env=os.environ.copy() | extra_make_envs, + ) - if p.returncode != 0: - logging.error(f"make failed: {p.returncode}") - sys.exit(1) + if p.returncode != 0: + logging.error(f"make failed: {p.returncode}") + sys.exit(1) - cmd = [f"./{bin}"] - if run_as_root: - logging.info( - "Running benchmarks as root -- you may need to enter your root password." - ) - cmd = ["sudo"] + cmd - if mac_taskpolicy is not None: - cmd = ["taskpolicy", "-c", mac_taskpolicy] + cmd +def base_run( + bin, + force_qemu, + verbose, + run_as_root=False, + exec_wrapper=None, +): + """Run the binary in all different ways""" + cmd = [f"./{bin}"] + if force_qemu or (platform.system() == "Linux" and platform.machine() == "x86_64"): + logging.info(f"Emulating {bin} with QEMU") + cmd = ["qemu-aarch64"] + cmd - result = subprocess.run( - cmd, - capture_output=True, - universal_newlines=False, + if run_as_root: + logging.info( + f"Running {bin} as root -- you may need to enter your root password." ) + cmd = ["sudo"] + cmd - if result.returncode != 0: - logging.error( - f"Running {bin} natively failed: {result.returncode} {result.stderr.decode()}" - ) - sys.exit(1) + if exec_wrapper: + logging.info(f"Running {bin} with customized wrapper.") + exec_wrapper = exec_wrapper.split(" ") + cmd = exec_wrapper + cmd + + logging.info(" ".join(cmd)) + result = subprocess.run( + cmd, + capture_output=True, + universal_newlines=False, + ) + + if result.returncode != 0: + logging.error( + f"Running '{cmd}' failed: {result.returncode} {result.stderr.decode()}" + ) + sys.exit(1) return result.stdout @@ -149,7 +141,7 @@ def test_schemes( force_qemu, verbose, run_as_root=False, - mac_taskpolicy=None, + exec_wrapper=None, extra_make_envs={}, extra_make_args=[], ): @@ -165,14 +157,13 @@ def test_schemes( results = {} for scheme in SCHEME: bin = scheme2file(scheme) + base_compile(bin, verbose, extra_make_envs, extra_make_args) result = base_run( bin, force_qemu, verbose, run_as_root, - mac_taskpolicy, - extra_make_envs, - extra_make_args, + exec_wrapper, ) results[scheme] = result @@ -279,11 +270,11 @@ def add_options(options): def run(bin, force_qemu, verbose, cflags, arch_flags): config_logger(verbose) + base_compile(bin, verbose, process_make_envs(cflags, arch_flags)) result = base_run( bin, force_qemu, verbose, - process_make_envs(cflags, arch_flags), ) logging.info(str(result, encoding="utf-8")) @@ -388,20 +379,41 @@ def kat(force_qemu, verbose, cflags, arch_flags): type=bool, help="Benchmarking binary is run with sudo.", ) +@click.option( + "-w", + "--exec-wrapper", + help="Run the benchmark binary with the user-customized wrapper.", +) @click.option( "-t", "--mac-taskpolicy", nargs=1, type=click.Choice(["utility", "background", "maintenance"]), + hidden=platform.system() != "Darwin", show_default=True, default=None, - help="Run the program using the specified QoS clamp. Applies to MacOS only. Setting this flag to 'background' guarantees running on E-cores.", + help="Run the program using the specified QoS clamp. Applies to MacOS only. Setting this flag to 'background' guarantees running on E-cores. This is an abbreviation of --exec-wrapper 'taskpolicy -c {mac_taskpolicy}'.", ) def bench( - force_qemu, verbose, cycles, cflags, arch_flags, output, run_as_root, mac_taskpolicy + force_qemu, + verbose, + cycles, + cflags, + arch_flags, + output, + run_as_root, + exec_wrapper, + mac_taskpolicy, ): config_logger(verbose) + if mac_taskpolicy: + if exec_wrapper: + logging.error(f"cannot set both --mac-taskpolicy and --exec-wrapper") + sys.exit(1) + else: + exec_wrapper = f"taskpolicy -c {mac_taskpolicy}" + results = test_schemes( "benchmark", lambda scheme: scheme.name.replace("MLKEM", "test/bin/bench_kyber"), @@ -411,7 +423,7 @@ def bench( force_qemu, verbose, run_as_root, - mac_taskpolicy, + exec_wrapper, process_make_envs(cflags, arch_flags), [f"CYCLES={cycles}"], ) @@ -430,10 +442,11 @@ def bench( # encaps cycles=X # decaps cycles=X - d = { - k: int(v) - for k, v in (l.decode().split("=") for l in r.splitlines()[:3]) - } + lines = [ + line.decode() for line in r.splitlines() if "=" in line.decode() + ] + + d = {k: int(v) for k, v in (l.split("=") for l in lines)} for primitive in ["keypair", "encaps", "decaps"]: v.append( {