-
Notifications
You must be signed in to change notification settings - Fork 269
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Run performance comparison in CI using Kani's Benchcomp
Use proof harnesses in AWS C Common as (initial) benchmark to guard against performance regressions introduced in PRs. See the "Summary" behind any of the executions of https://github.com/model-checking/kani/actions/workflows/bench.yml for examples what the output will look like. Co-authored-by: Kareem Khazem <[email protected]>
- Loading branch information
1 parent
270e82c
commit d618c6b
Showing
3 changed files
with
353 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
# Compare CBMC performance of selected benchmarks across two versions of CBMC, | ||
# erroring out on regression. This config file is to be used together with the | ||
# set-up in performance.yaml, because it assumes that there are two CBMC | ||
# checkouts in the 'old' and 'new' directories; benchcomp compares the | ||
# performance of these two checkouts. | ||
# | ||
# This configuration file is based on Benchcomp's perf-regression.yaml that | ||
# ships with Kani. | ||
|
||
variants: | ||
aws-c-common@old: | ||
config: | ||
command_line: cd verification/cbmc/proofs && export PATH=/home/runner/work/cbmc/cbmc/old/build/bin:$PATH && | ||
./run-cbmc-proofs.py | ||
directory: /home/runner/work/cbmc/cbmc/aws-c-common.git | ||
env: {} | ||
aws-c-common@new: | ||
config: | ||
command_line: cd verification/cbmc/proofs && export PATH=/home/runner/work/cbmc/cbmc/new/build/bin:$PATH && | ||
./run-cbmc-proofs.py | ||
directory: /home/runner/work/cbmc/cbmc/aws-c-common.git | ||
env: {} | ||
|
||
run: | ||
suites: | ||
aws-c-common: | ||
parser: | ||
command: /home/runner/work/cbmc/cbmc/new/.github/workflows/benchcomp-parse_cbmc.py | ||
variants: | ||
- aws-c-common@old | ||
- aws-c-common@new | ||
|
||
visualize: | ||
- type: dump_yaml | ||
out_file: '-' | ||
|
||
- type: dump_markdown_results_table | ||
out_file: '-' | ||
extra_columns: | ||
|
||
# For these two metrics, display the difference between old and new and | ||
# embolden if the absolute difference is more than 10% of the old value | ||
number_vccs: | ||
- column_name: diff old → new | ||
text: > | ||
lambda b: "" if b["aws-c-common@new"] == b["aws-c-common@old"] | ||
else ("**" if abs((b["aws-c-common@new"]-b["aws-c-common@old"])/b["aws-c-common@old"]) > 0.1 else "") | ||
+ ("+" if b["aws-c-common@new"] > b["aws-c-common@old"] else "") | ||
+ str(b["aws-c-common@new"] - b["aws-c-common@old"]) | ||
+ ("**" if abs((b["aws-c-common@new"]-b["aws-c-common@old"])/b["aws-c-common@old"]) > 0.1 else "") | ||
number_program_steps: | ||
- column_name: diff old → new | ||
text: > | ||
lambda b: "" if b["aws-c-common@new"] == b["aws-c-common@old"] | ||
else ("**" if abs((b["aws-c-common@new"]-b["aws-c-common@old"])/b["aws-c-common@old"]) > 0.1 else "") | ||
+ ("+" if b["aws-c-common@new"] > b["aws-c-common@old"] else "") | ||
+ str(b["aws-c-common@new"] - b["aws-c-common@old"]) | ||
+ ("**" if abs((b["aws-c-common@new"]-b["aws-c-common@old"])/b["aws-c-common@old"]) > 0.1 else "") | ||
# For 'runtime' metrics, display the % change from old to new, emboldening | ||
# cells whose absolute change is >50% | ||
solver_runtime: | ||
- column_name: "% change old → new" | ||
text: > | ||
lambda b: "" if b["aws-c-common@new"] == b["aws-c-common@old"] | ||
else ("**" if abs((b["aws-c-common@new"]-b["aws-c-common@old"])/b["aws-c-common@old"]) > 0.5 else "") | ||
+ ("+" if b["aws-c-common@new"] > b["aws-c-common@old"] else "") | ||
+ "%.3f%%" % ((b["aws-c-common@new"] - b["aws-c-common@old"]) * 100 / b["aws-c-common@old"]) | ||
+ ("**" if abs((b["aws-c-common@new"]-b["aws-c-common@old"])/b["aws-c-common@old"]) > 0.5 else "") | ||
verification_time: | ||
- column_name: "% change old → new" | ||
text: > | ||
lambda b: "" if b["aws-c-common@new"] == b["aws-c-common@old"] | ||
else ("**" if abs((b["aws-c-common@new"]-b["aws-c-common@old"])/b["aws-c-common@old"]) > 0.5 else "") | ||
+ ("+" if b["aws-c-common@new"] > b["aws-c-common@old"] else "") | ||
+ "%.3f%%" % ((b["aws-c-common@new"] - b["aws-c-common@old"]) * 100 / b["aws-c-common@old"]) | ||
+ ("**" if abs((b["aws-c-common@new"]-b["aws-c-common@old"])/b["aws-c-common@old"]) > 0.5 else "") | ||
symex_runtime: | ||
- column_name: "% change old → new" | ||
text: > | ||
lambda b: "" if b["aws-c-common@new"] == b["aws-c-common@old"] | ||
else ("**" if abs((b["aws-c-common@new"]-b["aws-c-common@old"])/b["aws-c-common@old"]) > 0.5 else "") | ||
+ ("+" if b["aws-c-common@new"] > b["aws-c-common@old"] else "") | ||
+ "%.3f%%" % ((b["aws-c-common@new"] - b["aws-c-common@old"]) * 100 / b["aws-c-common@old"]) | ||
+ ("**" if abs((b["aws-c-common@new"]-b["aws-c-common@old"])/b["aws-c-common@old"]) > 0.5 else "") | ||
# For success metric, display some text if success has changed | ||
success: | ||
- column_name: change | ||
text: > | ||
lambda b: "" if b["aws-c-common@new"] == b["aws-c-common@old"] | ||
else "❌ newly failing" if b["aws-c-common@old"] | ||
else "✅ newly passing" | ||
- type: error_on_regression | ||
variant_pairs: [[aws-c-common@old, aws-c-common@new]] | ||
checks: | ||
- metric: success | ||
# Compare the old and new variants of each benchmark. The | ||
# benchmark has regressed if the lambda returns true. | ||
test: "lambda old, new: False if not old else not new" | ||
- metric: solver_runtime | ||
test: "lambda old, new: False if new < 10 else new/old > 1.5" | ||
- metric: symex_runtime | ||
test: "lambda old, new: False if new < 10 else new/old > 1.5" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,137 @@ | ||
#!/usr/bin/env python3 | ||
# Copyright Kani Contributors | ||
# SPDX-License-Identifier: Apache-2.0 OR MIT | ||
|
||
|
||
import json | ||
import logging | ||
import os | ||
import pathlib | ||
import re | ||
import sys | ||
import textwrap | ||
|
||
import yaml | ||
|
||
|
||
def get_description(): | ||
return textwrap.dedent("""\ | ||
Read CBMC statistics from a Litani run.json file. | ||
""") | ||
|
||
|
||
def _get_metrics(): | ||
return { | ||
"solver_runtime": { | ||
"pat": re.compile(r"Runtime Solver: (?P<value>[-e\d\.]+)s"), | ||
"parse": float, | ||
}, | ||
"removed_program_steps": { | ||
"pat": re.compile(r"slicing removed (?P<value>\d+) assignments"), | ||
"parse": int, | ||
}, | ||
"number_program_steps": { | ||
"pat": re.compile(r"size of program expression: (?P<value>\d+) steps"), | ||
"parse": int, | ||
}, | ||
"number_vccs": { | ||
"pat": re.compile( | ||
r"Generated \d+ VCC\(s\), (?P<value>\d+) remaining after simplification"), | ||
"parse": int, | ||
}, | ||
"symex_runtime": { | ||
"pat": re.compile(r"Runtime Symex: (?P<value>[-e\d\.]+)s"), | ||
"parse": float, | ||
}, | ||
"success": { | ||
"pat": re.compile(r"VERIFICATION:- (?P<value>\w+)"), | ||
"parse": lambda v: v == "SUCCESSFUL", | ||
}, | ||
} | ||
|
||
|
||
def get_metrics(): | ||
metrics = dict(_get_metrics()) | ||
for _, info in metrics.items(): | ||
for field in ("pat", "parse"): | ||
info.pop(field) | ||
|
||
# This is not a metric we return; it is used to find the correct value for | ||
# the number_program_steps metric | ||
metrics.pop("removed_program_steps", None) | ||
|
||
return metrics | ||
|
||
|
||
def get_run(root_dir): | ||
for fyle in pathlib.Path(root_dir).rglob("run.json"): | ||
with open(fyle) as handle: | ||
return json.load(handle) | ||
logging.error("No run.json found in %s", root_dir) | ||
sys.exit(1) | ||
|
||
|
||
def main(root_dir): | ||
root_dir = pathlib.Path(root_dir) | ||
run = get_run(root_dir) | ||
metrics = _get_metrics() | ||
benchmarks = {} | ||
for pipe in run["pipelines"]: | ||
for stage in pipe["ci_stages"]: | ||
if stage["name"] != "test": | ||
continue | ||
for job in stage["jobs"]: | ||
if not job["wrapper_arguments"]["command"].startswith("cbmc "): | ||
continue | ||
if "cover" in job["wrapper_arguments"]["command"]: | ||
continue | ||
if "--show-properties" in job["wrapper_arguments"]["command"]: | ||
continue | ||
|
||
bench_name = f"{run['project']}::{pipe['name']}" | ||
if not job["complete"]: | ||
logging.warning( | ||
"Found an incomplete CBMC benchmark '%s'", | ||
bench_name) | ||
continue | ||
|
||
benchmarks[bench_name] = { | ||
"metrics": { | ||
"success": job["outcome"] == "success", | ||
"verification_time": job["duration_ms"], | ||
}} | ||
|
||
for line in job["stdout"]: | ||
for metric, metric_info in metrics.items(): | ||
m = metric_info["pat"].search(line) | ||
if not m: | ||
continue | ||
|
||
parse = metric_info["parse"] | ||
try: | ||
# CBMC prints out some metrics more than once, e.g. | ||
# "Solver" and "decision procedure". Add those | ||
# values together | ||
benchmarks[bench_name]["metrics"][metric] += parse(m["value"]) | ||
except (KeyError, TypeError): | ||
benchmarks[bench_name]["metrics"][metric] = parse(m["value"]) | ||
break | ||
|
||
for bench_name, bench_info in benchmarks.items(): | ||
try: | ||
n_steps = bench_info["metrics"]["number_program_steps"] | ||
rm_steps = bench_info["metrics"]["removed_program_steps"] | ||
bench_info["metrics"]["number_program_steps"] = n_steps - rm_steps | ||
bench_info["metrics"].pop("removed_program_steps", None) | ||
except KeyError: | ||
pass | ||
|
||
return { | ||
"metrics": get_metrics(), | ||
"benchmarks": benchmarks, | ||
} | ||
|
||
|
||
if __name__ == "__main__": | ||
result = main(os.getcwd()) | ||
print(yaml.dump(result, default_flow_style=False)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
# Run performance benchmarks comparing two different CBMC versions. | ||
name: Performance Benchmarking | ||
on: | ||
push: | ||
branches: [ develop ] | ||
pull_request: | ||
branches: [ develop ] | ||
|
||
jobs: | ||
perf-benchcomp: | ||
runs-on: ubuntu-20.04 | ||
steps: | ||
- name: Save push event HEAD and HEAD~ to environment variables | ||
if: ${{ github.event_name == 'push' }} | ||
run: | | ||
echo "NEW_REF=${{ github.event.after}}" | tee -a "$GITHUB_ENV" | ||
echo "OLD_REF=${{ github.event.before }}" | tee -a "$GITHUB_ENV" | ||
- name: Save pull request HEAD and base to environment variables | ||
if: ${{ contains(fromJSON('["pull_request", "pull_request_target"]'), github.event_name) }} | ||
run: | | ||
echo "OLD_REF=${{ github.event.pull_request.base.sha }}" | tee -a "$GITHUB_ENV" | ||
echo "NEW_REF=${{ github.event.pull_request.head.sha }}" | tee -a "$GITHUB_ENV" | ||
- name: Check out CBMC (old variant) | ||
uses: actions/checkout@v4 | ||
with: | ||
submodules: recursive | ||
path: ./old | ||
ref: ${{ env.OLD_REF }} | ||
fetch-depth: 2 | ||
|
||
- name: Check out CBMC (new variant) | ||
uses: actions/checkout@v4 | ||
with: | ||
submodules: recursive | ||
path: ./new | ||
ref: ${{ env.NEW_REF }} | ||
fetch-depth: 1 | ||
|
||
- name: Fetch dependencies | ||
env: | ||
# This is needed in addition to -yq to prevent apt-get from asking for | ||
# user input | ||
DEBIAN_FRONTEND: noninteractive | ||
run: | | ||
sudo apt-get update | ||
sudo apt-get install --no-install-recommends -yq cmake ninja-build gcc g++ maven flex bison ccache | ||
- name: Prepare ccache | ||
uses: actions/cache/restore@v4 | ||
with: | ||
path: .ccache | ||
key: ${{ runner.os }}-20.04-Release-${{ github.ref }}-${{ github.sha }}-PR | ||
restore-keys: | | ||
${{ runner.os }}-20.04-Release-${{ github.ref }} | ||
${{ runner.os }}-20.04-Release | ||
- name: ccache environment | ||
run: echo "CCACHE_DIR=$PWD/.ccache" >> $GITHUB_ENV | ||
- name: Zero ccache stats and limit in size | ||
run: ccache -z --max-size=500M | ||
|
||
- name: Configure using CMake (new variant) | ||
run: cmake -S new/ -B new/build -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=/usr/bin/gcc -DCMAKE_CXX_COMPILER=/usr/bin/g++ -Dsat_impl="minisat2;cadical" | ||
- name: ccache environment (new variant) | ||
run: echo "CCACHE_BASEDIR=$PWD/new" >> $GITHUB_ENV | ||
- name: Build with Ninja (new variant) | ||
run: ninja -C new/build -j2 | ||
- name: Print ccache stats (new variant) | ||
run: ccache -s | ||
|
||
- name: Configure using CMake (old variant) | ||
run: cmake -S old/ -B old/build -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=/usr/bin/gcc -DCMAKE_CXX_COMPILER=/usr/bin/g++ -Dsat_impl="minisat2;cadical" | ||
- name: ccache environment (old variant) | ||
run: echo "CCACHE_BASEDIR=$PWD/old" >> $GITHUB_ENV | ||
- name: Build with Ninja (old variant) | ||
run: ninja -C old/build -j2 | ||
- name: Print ccache stats (old variant) | ||
run: ccache -s | ||
|
||
- name: Obtain benchcomp | ||
run: git clone --depth 1 https://github.com/model-checking/kani kani.git | ||
|
||
- name: Fetch benchmarks | ||
run: | | ||
git clone --depth 1 https://github.com/awslabs/aws-c-common aws-c-common.git | ||
sudo apt-get install --no-install-recommends -yq gnuplot graphviz | ||
curl -L --remote-name \ | ||
https://github.com/awslabs/aws-build-accumulator/releases/download/1.29.0/litani-1.29.0.deb | ||
sudo dpkg -i litani-1.29.0.deb | ||
- name: Run benchcomp | ||
run: | | ||
kani.git/tools/benchcomp/bin/benchcomp \ | ||
--config new/.github/workflows/benchcomp-config.yaml \ | ||
run | ||
kani.git/tools/benchcomp/bin/benchcomp \ | ||
--config new/.github/workflows/benchcomp-config.yaml \ | ||
collate | ||
- name: Perf Regression Results Table | ||
run: | | ||
kani.git/tools/benchcomp/bin/benchcomp \ | ||
--config new/.github/workflows/benchcomp-config.yaml \ | ||
visualize --only dump_markdown_results_table >> "$GITHUB_STEP_SUMMARY" | ||
- name: Run other visualizations | ||
run: | | ||
kani.git/tools/benchcomp/bin/benchcomp \ | ||
--config new/.github/workflows/benchcomp-config.yaml \ | ||
visualize --except dump_markdown_results_table |