From 530a7954acad829367ba1ca457ad3e8bb2bd0487 Mon Sep 17 00:00:00 2001 From: Hanno Becker Date: Sun, 17 Nov 2024 19:15:49 +0000 Subject: [PATCH] CI: Benchmark x86_64-based EC2 instances using PMU instead of PERF Previous benchmarks on x86_64-based EC2 instances indicated some accuracy in the performance measurements using the `perf` kernel module. Performance measurements using `rdtsc`, in turn, seem to work more reliably. This commit changes the benchmarking CI for x86_64 instances to use `rdtsc` to obtain the cycle counter, and no longer `perf`. To allow this the choice between PMU and PERF first needs to be made an additional parameter to the EC2 benchmarking workflow. Signed-off-by: Hanno Becker --- .github/workflows/bench.yml | 8 ++++++++ .github/workflows/bench_ec2_reusable.yml | 8 ++++++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index c83f3cf15..407bb4afd 100644 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -72,36 +72,43 @@ jobs: ec2_ami: ubuntu-latest (aarch64) archflags: -mcpu=cortex-a76 -march=armv8.2-a cflags: -DFORCE_AARCH64 + perf: PERF - name: Graviton3 ec2_instance_type: c7g.medium ec2_ami: ubuntu-latest (aarch64) archflags: -march=armv8.4-a+sha3 cflags: -DFORCE_AARCH64 + perf: PERF - name: Graviton4 ec2_instance_type: c8g.medium ec2_ami: ubuntu-latest (aarch64) archflags: -march=armv9-a+sha3 cflags: -DFORCE_AARCH64 + perf: PERF - name: AMD EPYC 4th gen (c7a) ec2_instance_type: c7a.medium ec2_ami: ubuntu-latest (x86_64) archflags: -mavx2 -mbmi2 -mpopcnt -maes cflags: -DFORCE_X86_64 + perf: PMU - name: Intel Xeon 4th gen (c7i) ec2_instance_type: c7i.large ec2_ami: ubuntu-latest (x86_64) archflags: -mavx2 -mbmi2 -mpopcnt -maes cflags: -DFORCE_X86_64 + perf: PMU - name: AMD EPYC 3rd gen (c6a) ec2_instance_type: c7a.medium ec2_ami: ubuntu-latest (x86_64) archflags: -mavx2 -mbmi2 -mpopcnt -maes cflags: -DFORCE_X86_64 + perf: PMU - name: Intel Xeon 3rd gen (c6i) ec2_instance_type: c7i.large ec2_ami: ubuntu-latest (x86_64) archflags: -mavx2 -mbmi2 -mpopcnt -maes cflags: -DFORCE_X86_64 + perf: PMU uses: ./.github/workflows/bench_ec2_reusable.yml if: github.repository_owner == 'pq-code-package' && (github.event.label.name == 'benchmark' || github.ref == 'refs/heads/main') with: @@ -112,4 +119,5 @@ jobs: opt: ${{ matrix.opt.value }} store_results: ${{ github.repository_owner == 'pq-code-package' && github.ref == 'refs/heads/main' }} # Only store optimized results name: "${{ matrix.target.name }}${{ (!matrix.opt.value && ' (no-opt)') || ''}}" + perf: ${{ matrix.target.perf }} secrets: inherit diff --git a/.github/workflows/bench_ec2_reusable.yml b/.github/workflows/bench_ec2_reusable.yml index e44b783a7..b822ab40a 100644 --- a/.github/workflows/bench_ec2_reusable.yml +++ b/.github/workflows/bench_ec2_reusable.yml @@ -34,6 +34,10 @@ on: type: boolean description: Runs with optimized code if enabled. default: true + perf: + type: string + description: Method by which clock cycles should be measured (PMU | PERF) + default: PERF store_results: type: boolean description: Indicates if results should be pushed to github pages @@ -121,7 +125,7 @@ jobs: cflags: ${{ inputs.cflags }} archflags: ${{ inputs.archflags }} opt: ${{ inputs.opt }} - perf: PERF + perf: ${{ inputs.perf }} store_results: ${{ inputs.store_results }} bench_extra_args: ${{ inputs.bench_extra_args }} gh_token: ${{ secrets.AWS_GITHUB_TOKEN }} @@ -148,7 +152,7 @@ jobs: cflags: ${{ inputs.cflags }} archflags: ${{ inputs.archflags }} opt: ${{ inputs.opt }} - perf: PERF + perf: ${{ inputs.perf }} store_results: ${{ inputs.store_results }} bench_extra_args: ${{ inputs.bench_extra_args }} gh_token: ${{ secrets.AWS_GITHUB_TOKEN }}