Skip to content

Commit

Permalink
Add PMU+PERF benchmarking code (#68)
Browse files Browse the repository at this point in the history
* add benchmarks using PMU cycle counter

Signed-off-by: Matthias J. Kannwischer <[email protected]>

* add benchmarking script

Signed-off-by: Matthias J. Kannwischer <[email protected]>

* fix warnings on MacOS

Signed-off-by: Matthias J. Kannwischer <[email protected]>

* add PERF cycle counting as well

Signed-off-by: Matthias J. Kannwischer <[email protected]>

* format

Signed-off-by: Matthias J. Kannwischer <[email protected]>

* only print output for benchmarks

Signed-off-by: Matthias J. Kannwischer <[email protected]>

---------

Signed-off-by: Matthias J. Kannwischer <[email protected]>
  • Loading branch information
mkannwischer authored Jun 20, 2024
1 parent 4c2e281 commit 71cc3a3
Show file tree
Hide file tree
Showing 5 changed files with 357 additions and 6 deletions.
35 changes: 34 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,16 @@ ifeq ($(HOST_PLATFORM),Linux-x86_64)
CFLAGS += -static
endif

CYCLES ?= NO

ifeq ($(CYCLES),PMU)
CFLAGS += -DPMU_CYCLES
endif

ifeq ($(CYCLES),PERF)
CFLAGS += -DPERF_CYCLES
endif

CFLAGS_RANDOMBYTES = ${CFLAGS} ${INCLUDE_RANDOM}
CFLAGS_NISTRANDOMBYTES = ${CFLAGS} ${INCLUDE_NISTRANDOM}
NISTFLAGS += -Wno-unused-result -O3 -fomit-frame-pointer
Expand All @@ -27,21 +37,29 @@ SOURCES = mlkem/kem.c mlkem/indcpa.c mlkem/polyvec.c mlkem/poly.c mlkem/ntt.c ml
SOURCESKECCAK = $(SOURCES) fips202/keccakf1600.c fips202/fips202.c mlkem/symmetric-shake.c
SOURCESKECCAKRANDOM = $(SOURCESKECCAK) randombytes/randombytes.c
SOURCESNISTKATS = $(SOURCESKECCAK) test/nistrng/aes.c test/nistrng/rng.c
SOURCESBENCH = $(SOURCESKECCAKRANDOM) test/hal.c

HEADERS = mlkem/params.h mlkem/kem.h mlkem/indcpa.h mlkem/polyvec.h mlkem/poly.h mlkem/ntt.h mlkem/cbd.h mlkem/reduce.h mlkem/verify.h mlkem/symmetric.h
HEADERSKECCAK = $(HEADERS) fips202/keccakf1600.h fips202/fips202.h
HEADERSKECCAKRANDOM = $(HEADERSKECCAK) randombytes/randombytes.h
HEADERNISTKATS = $(HEADERSKECCAK) test/nistrng/aes.h test/nistrng/randombytes.h
HEADERSBENCH = $(HEADERSKECCAKRANDOM) test/hal.h


.PHONY: all mlkem kat nistkat clean

all: mlkem kat nistkat
all: mlkem bench kat nistkat

mlkem: \
test/bin/test_kyber512 \
test/bin/test_kyber768 \
test/bin/test_kyber1024

bench: \
test/bin/bench_kyber512 \
test/bin/bench_kyber768 \
test/bin/bench_kyber1024

nistkat: \
test/bin/gen_NISTKAT512 \
test/bin/gen_NISTKAT768 \
Expand All @@ -67,6 +85,21 @@ test/bin/test_kyber1024: test/test_kyber.c $(SOURCESKECCAKRANDOM) $(HEADERSKECCA
$(Q)[ -d $(@D) ] || mkdir -p $(@D)
$(CC) $(CFLAGS_RANDOMBYTES) -DKYBER_K=4 $(SOURCESKECCAKRANDOM) $< -o $@

test/bin/bench_kyber512: test/bench_kyber.c $(SOURCESBENCH) $(HEADERSBENCH)
$(Q)echo " CC $@"
$(Q)[ -d $(@D) ] || mkdir -p $(@D)
$(CC) $(CFLAGS_RANDOMBYTES) -DKYBER_K=2 $(SOURCESBENCH) $< -o $@

test/bin/bench_kyber768: test/bench_kyber.c $(SOURCESBENCH) $(HEADERSBENCH)
$(Q)echo " CC $@"
$(Q)[ -d $(@D) ] || mkdir -p $(@D)
$(CC) $(CFLAGS_RANDOMBYTES) -DKYBER_K=3 $(SOURCESBENCH) $< -o $@

test/bin/bench_kyber1024: test/bench_kyber.c $(SOURCESBENCH) $(HEADERSBENCH)
$(Q)echo " CC $@"
$(Q)[ -d $(@D) ] || mkdir -p $(@D)
$(CC) $(CFLAGS_RANDOMBYTES) -DKYBER_K=4 $(SOURCESBENCH) $< -o $@

test/bin/gen_KAT512: test/gen_KAT.c $(SOURCESKECCAKRANDOM) $(HEADERSKECCAKRANDOM)
$(Q)echo " CC $@"
$(Q)[ -d $(@D) ] || mkdir -p $(@D)
Expand Down
53 changes: 48 additions & 5 deletions scripts/tests
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,16 @@ def sha256sum(result):
return m.hexdigest()


def base_run(bin, force_qemu, verbose):
def base_run(bin, force_qemu, verbose, cycles="NO"):
if force_qemu or (platform.system() == "Linux" and platform.machine() == "x86_64"):
logging.debug(f"Emulating {bin} with QEMU")

args = ["make", "CROSS_PREFIX=aarch64-none-linux-gnu-", f"{bin}"]
args = [
"make",
"CROSS_PREFIX=aarch64-none-linux-gnu-",
f"CYCLES={cycles}",
f"{bin}",
]
logging.info(" ".join(args))

p = subprocess.run(
Expand All @@ -49,7 +54,7 @@ def base_run(bin, force_qemu, verbose):
else:
logging.debug(f"Running {bin} natively")

args = ["make", f"{bin}"]
args = ["make", f"CYCLES={cycles}", f"{bin}"]
logging.info(" ".join(args))

p = subprocess.run(
Expand Down Expand Up @@ -97,7 +102,9 @@ def parse_meta(scheme, field):
return result.stdout.strip()


def test_schemes(title, scheme2file, actual_proc, expect_proc, force_qemu, verbose):
def test_schemes(
title, scheme2file, actual_proc, expect_proc, force_qemu, verbose, cycles="NO"
):
logging.info(f"{title}")

summary_file = os.environ.get("GITHUB_STEP_SUMMARY")
Expand All @@ -117,9 +124,11 @@ def test_schemes(title, scheme2file, actual_proc, expect_proc, force_qemu, verbo
return (fail, summary)

fail = False
results = {}
for scheme in SCHEME:
bin = scheme2file(scheme)
result = base_run(bin, force_qemu, verbose)
result = base_run(bin, force_qemu, verbose, cycles)
results[scheme] = result

actual = actual_proc(result)
expect = expect_proc(scheme)
Expand All @@ -135,6 +144,8 @@ def test_schemes(title, scheme2file, actual_proc, expect_proc, force_qemu, verbo
if fail:
sys.exit(1)

return results


def validate_force_qemu(ctx, _, v):
if platform.system() == "Darwin" and v:
Expand Down Expand Up @@ -254,6 +265,37 @@ def kat(force_qemu, verbose):
)


@click.command(
short_help="Run the benchmarks for all parameter sets",
context_settings={"show_default": True},
)
@add_options(_shared_options)
@click.option(
"-c",
"--cycles",
nargs=1,
type=click.Choice(["NO", "PMU", "PERF"]),
show_default=True,
default="NO",
help="Method for counting clock cycles. PMU requires (user-space) access to the Arm Performance Monitor Unit (PMU). PERF requires a kernel with perf support.",
)
def bench(force_qemu, verbose, cycles):
config_logger(verbose)

results = test_schemes(
"benchmark",
lambda scheme: scheme.name.replace("MLKEM", "test/bin/bench_kyber"),
lambda _: True,
lambda _: True,
force_qemu,
verbose,
cycles=cycles,
)
for scheme, result in results.items():
print(scheme)
print(result.decode())


@click.group(invoke_without_command=True)
def cli():
pass
Expand All @@ -263,6 +305,7 @@ cli.add_command(run)
cli.add_command(func)
cli.add_command(nistkat)
cli.add_command(kat)
cli.add_command(bench)

if __name__ == "__main__":
cli()
111 changes: 111 additions & 0 deletions test/bench_kyber.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
// SPDX-License-Identifier: Apache-2.0
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <inttypes.h>
#include "kem.h"
#include "hal.h"
#include "randombytes.h"

#define NWARMUP 50
#define NITERERATIONS 300
#define NTESTS 200

static int cmp_uint64_t(const void *a, const void *b)
{
return (int)((*((const uint64_t *)a)) - (*((const uint64_t *)b)));
}

static int bench(void)
{
uint8_t pk[CRYPTO_PUBLICKEYBYTES];
uint8_t sk[CRYPTO_SECRETKEYBYTES];
uint8_t ct[CRYPTO_CIPHERTEXTBYTES];
uint8_t key_a[CRYPTO_BYTES];
uint8_t key_b[CRYPTO_BYTES];
unsigned char kg_rand[2 * CRYPTO_BYTES], enc_rand[CRYPTO_BYTES];
uint64_t cycles_kg[NTESTS], cycles_enc[NTESTS], cycles_dec[NTESTS];

unsigned int i, j;
uint64_t t0, t1;


for (i = 0; i < NTESTS; i++)
{

randombytes(kg_rand, 2 * CRYPTO_BYTES);
randombytes(enc_rand, CRYPTO_BYTES);

// Key-pair generation
for (j = 0; j < NWARMUP; j++)
{
crypto_kem_keypair_derand(pk, sk, kg_rand);
}

t0 = get_cyclecounter();
for (j = 0; j < NITERERATIONS; j++)
{
crypto_kem_keypair_derand(pk, sk, kg_rand);
}
t1 = get_cyclecounter();
cycles_kg[i] = t1 - t0;


// Encapsulation
for (j = 0; j < NWARMUP; j++)
{
crypto_kem_enc_derand(ct, key_a, pk, enc_rand);
}
t0 = get_cyclecounter();
for (j = 0; j < NITERERATIONS; j++)
{
crypto_kem_enc_derand(ct, key_a, pk, enc_rand);
}
t1 = get_cyclecounter();
cycles_enc[i] = t1 - t0;

// Decapsulation
for (j = 0; j < NWARMUP; j++)
{
crypto_kem_dec(key_b, ct, sk);
}
t0 = get_cyclecounter();
for (j = 0; j < NITERERATIONS; j++)
{
crypto_kem_dec(key_b, ct, sk);
}
t1 = get_cyclecounter();
cycles_dec[i] = t1 - t0;


if (memcmp(key_a, key_b, CRYPTO_BYTES))
{
printf("ERROR keys\n");
return 1;
}
}

qsort(cycles_kg, NTESTS, sizeof(uint64_t), cmp_uint64_t);
qsort(cycles_enc, NTESTS, sizeof(uint64_t), cmp_uint64_t);
qsort(cycles_dec, NTESTS, sizeof(uint64_t), cmp_uint64_t);

printf("keypair cycles=%"PRIu64"\n", cycles_kg[NTESTS >> 1]/NITERERATIONS);
printf("encaps cycles=%"PRIu64"\n", cycles_enc[NTESTS >> 1]/NITERERATIONS);
printf("decaps cycles=%"PRIu64"\n", cycles_dec[NTESTS >> 1]/NITERERATIONS);

return 0;
}

int main(void)
{
enable_cyclecounter();
bench();
disable_cyclecounter();

printf("CRYPTO_SECRETKEYBYTES: %d\n", CRYPTO_SECRETKEYBYTES);
printf("CRYPTO_PUBLICKEYBYTES: %d\n", CRYPTO_PUBLICKEYBYTES);
printf("CRYPTO_CIPHERTEXTBYTES: %d\n", CRYPTO_CIPHERTEXTBYTES);

return 0;
}
Loading

0 comments on commit 71cc3a3

Please sign in to comment.