Skip to content

Commit

Permalink
Merge pull request #421 from pq-code-package/bench_ec2_pmu
Browse files Browse the repository at this point in the history
Bench: Use PMU instead of PERF for x86_64 benchmarking
  • Loading branch information
mkannwischer authored Nov 18, 2024
2 parents 383f1a4 + 530a795 commit d30bc18
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 2 deletions.
8 changes: 8 additions & 0 deletions .github/workflows/bench.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,36 +72,43 @@ jobs:
ec2_ami: ubuntu-latest (aarch64)
archflags: -mcpu=cortex-a76 -march=armv8.2-a
cflags: -DFORCE_AARCH64
perf: PERF
- name: Graviton3
ec2_instance_type: c7g.medium
ec2_ami: ubuntu-latest (aarch64)
archflags: -march=armv8.4-a+sha3
cflags: -DFORCE_AARCH64
perf: PERF
- name: Graviton4
ec2_instance_type: c8g.medium
ec2_ami: ubuntu-latest (aarch64)
archflags: -march=armv9-a+sha3
cflags: -DFORCE_AARCH64
perf: PERF
- name: AMD EPYC 4th gen (c7a)
ec2_instance_type: c7a.medium
ec2_ami: ubuntu-latest (x86_64)
archflags: -mavx2 -mbmi2 -mpopcnt -maes
cflags: -DFORCE_X86_64
perf: PMU
- name: Intel Xeon 4th gen (c7i)
ec2_instance_type: c7i.large
ec2_ami: ubuntu-latest (x86_64)
archflags: -mavx2 -mbmi2 -mpopcnt -maes
cflags: -DFORCE_X86_64
perf: PMU
- name: AMD EPYC 3rd gen (c6a)
ec2_instance_type: c7a.medium
ec2_ami: ubuntu-latest (x86_64)
archflags: -mavx2 -mbmi2 -mpopcnt -maes
cflags: -DFORCE_X86_64
perf: PMU
- name: Intel Xeon 3rd gen (c6i)
ec2_instance_type: c7i.large
ec2_ami: ubuntu-latest (x86_64)
archflags: -mavx2 -mbmi2 -mpopcnt -maes
cflags: -DFORCE_X86_64
perf: PMU
uses: ./.github/workflows/bench_ec2_reusable.yml
if: github.repository_owner == 'pq-code-package' && (github.event.label.name == 'benchmark' || github.ref == 'refs/heads/main')
with:
Expand All @@ -112,4 +119,5 @@ jobs:
opt: ${{ matrix.opt.value }}
store_results: ${{ github.repository_owner == 'pq-code-package' && github.ref == 'refs/heads/main' }} # Only store optimized results
name: "${{ matrix.target.name }}${{ (!matrix.opt.value && ' (no-opt)') || ''}}"
perf: ${{ matrix.target.perf }}
secrets: inherit
8 changes: 6 additions & 2 deletions .github/workflows/bench_ec2_reusable.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ on:
type: boolean
description: Runs with optimized code if enabled.
default: true
perf:
type: string
description: Method by which clock cycles should be measured (PMU | PERF)
default: PERF
store_results:
type: boolean
description: Indicates if results should be pushed to github pages
Expand Down Expand Up @@ -121,7 +125,7 @@ jobs:
cflags: ${{ inputs.cflags }}
archflags: ${{ inputs.archflags }}
opt: ${{ inputs.opt }}
perf: PERF
perf: ${{ inputs.perf }}
store_results: ${{ inputs.store_results }}
bench_extra_args: ${{ inputs.bench_extra_args }}
gh_token: ${{ secrets.AWS_GITHUB_TOKEN }}
Expand All @@ -148,7 +152,7 @@ jobs:
cflags: ${{ inputs.cflags }}
archflags: ${{ inputs.archflags }}
opt: ${{ inputs.opt }}
perf: PERF
perf: ${{ inputs.perf }}
store_results: ${{ inputs.store_results }}
bench_extra_args: ${{ inputs.bench_extra_args }}
gh_token: ${{ secrets.AWS_GITHUB_TOKEN }}
Expand Down
23 changes: 23 additions & 0 deletions test/hal/hal.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,25 @@

#if defined(PMU_CYCLES)

#if defined(__x86_64__)

void enable_cyclecounter(void) {}

void disable_cyclecounter(void) {}

uint64_t get_cyclecounter(void) {
uint64_t result;

__asm__ volatile("rdtsc; shlq $32,%%rdx; orq %%rdx,%%rax"
: "=a"(result)
:
: "%rdx");

return result;
}

#elif defined(__AARCH64EL__) || defined(_M_ARM64)

void enable_cyclecounter(void) {
uint64_t tmp;
__asm __volatile(
Expand Down Expand Up @@ -61,6 +80,10 @@ uint64_t get_cyclecounter(void) {
return retval;
}

#else
#error PMU_CYCLES option only supported on x86_64 and AArch64
#endif

#elif defined(PERF_CYCLES)

#include <asm/unistd.h>
Expand Down

0 comments on commit d30bc18

Please sign in to comment.