Skip to content

Commit

Permalink
Run performance comparison in CI using Kani's Benchcomp
Browse files Browse the repository at this point in the history
Use proof harnesses in AWS C Common as (initial) benchmark to guard
against performance regressions introduced in PRs. See the "Summary"
behind any of the executions of
https://github.com/model-checking/kani/actions/workflows/bench.yml for
examples what the output will look like.

Co-authored-by: Kareem Khazem <[email protected]>
  • Loading branch information
tautschnig and karkhaz committed Feb 6, 2024
1 parent 2d96b83 commit 3ac5219
Show file tree
Hide file tree
Showing 3 changed files with 361 additions and 0 deletions.
105 changes: 105 additions & 0 deletions .github/workflows/benchcomp-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
# Compare CBMC performance of selected benchmarks across two versions of CBMC,
# erroring out on regression. This config file is to be used together with the
# set-up in performance.yaml, because it assumes that there are two CBMC
# checkouts in the 'old' and 'new' directories; benchcomp compares the
# performance of these two checkouts.
#
# This configuration file is based on Benchcomp's perf-regression.yaml that
# ships with Kani.

variants:
aws-c-common@old:
config:
command_line: cd verification/cbmc/proofs && export PATH=/home/runner/work/cbmc/cbmc/old/build/bin:$PATH &&
./run-cbmc-proofs.py
directory: /home/runner/work/cbmc/cbmc/aws-c-common.git
env: {}
aws-c-common@new:
config:
command_line: cd verification/cbmc/proofs && export PATH=/home/runner/work/cbmc/cbmc/new/build/bin:$PATH &&
./run-cbmc-proofs.py
directory: /home/runner/work/cbmc/cbmc/aws-c-common.git
env: {}

run:
suites:
aws-c-common:
parser:
command: /home/runner/work/cbmc/cbmc/new/.github/workflows/benchcomp-parse_cbmc.py
variants:
- aws-c-common@old
- aws-c-common@new

visualize:
- type: dump_yaml
out_file: '-'

- type: dump_markdown_results_table
out_file: '-'
extra_columns:

# For these two metrics, display the difference between old and new and
# embolden if the absolute difference is more than 10% of the old value
number_vccs:
- column_name: diff old → new
text: >
lambda b: "" if b["aws-c-common@new"] == b["aws-c-common@old"]
else ("**" if abs((b["aws-c-common@new"]-b["aws-c-common@old"])/b["aws-c-common@old"]) > 0.1 else "")
+ ("+" if b["aws-c-common@new"] > b["aws-c-common@old"] else "")
+ str(b["aws-c-common@new"] - b["aws-c-common@old"])
+ ("**" if abs((b["aws-c-common@new"]-b["aws-c-common@old"])/b["aws-c-common@old"]) > 0.1 else "")
number_program_steps:
- column_name: diff old → new
text: >
lambda b: "" if b["aws-c-common@new"] == b["aws-c-common@old"]
else ("**" if abs((b["aws-c-common@new"]-b["aws-c-common@old"])/b["aws-c-common@old"]) > 0.1 else "")
+ ("+" if b["aws-c-common@new"] > b["aws-c-common@old"] else "")
+ str(b["aws-c-common@new"] - b["aws-c-common@old"])
+ ("**" if abs((b["aws-c-common@new"]-b["aws-c-common@old"])/b["aws-c-common@old"]) > 0.1 else "")
# For 'runtime' metrics, display the % change from old to new, emboldening
# cells whose absolute change is >50%
solver_runtime:
- column_name: "% change old → new"
text: >
lambda b: "" if b["aws-c-common@new"] == b["aws-c-common@old"]
else ("**" if abs((b["aws-c-common@new"]-b["aws-c-common@old"])/b["aws-c-common@old"]) > 0.5 else "")
+ ("+" if b["aws-c-common@new"] > b["aws-c-common@old"] else "")
+ "%.3f%%" % ((b["aws-c-common@new"] - b["aws-c-common@old"]) * 100 / b["aws-c-common@old"])
+ ("**" if abs((b["aws-c-common@new"]-b["aws-c-common@old"])/b["aws-c-common@old"]) > 0.5 else "")
verification_time:
- column_name: "% change old → new"
text: >
lambda b: "" if b["aws-c-common@new"] == b["aws-c-common@old"]
else ("**" if abs((b["aws-c-common@new"]-b["aws-c-common@old"])/b["aws-c-common@old"]) > 0.5 else "")
+ ("+" if b["aws-c-common@new"] > b["aws-c-common@old"] else "")
+ "%.3f%%" % ((b["aws-c-common@new"] - b["aws-c-common@old"]) * 100 / b["aws-c-common@old"])
+ ("**" if abs((b["aws-c-common@new"]-b["aws-c-common@old"])/b["aws-c-common@old"]) > 0.5 else "")
symex_runtime:
- column_name: "% change old → new"
text: >
lambda b: "" if b["aws-c-common@new"] == b["aws-c-common@old"]
else ("**" if abs((b["aws-c-common@new"]-b["aws-c-common@old"])/b["aws-c-common@old"]) > 0.5 else "")
+ ("+" if b["aws-c-common@new"] > b["aws-c-common@old"] else "")
+ "%.3f%%" % ((b["aws-c-common@new"] - b["aws-c-common@old"]) * 100 / b["aws-c-common@old"])
+ ("**" if abs((b["aws-c-common@new"]-b["aws-c-common@old"])/b["aws-c-common@old"]) > 0.5 else "")
# For success metric, display some text if success has changed
success:
- column_name: change
text: >
lambda b: "" if b["aws-c-common@new"] == b["aws-c-common@old"]
else "❌ newly failing" if b["aws-c-common@old"]
else "✅ newly passing"
- type: error_on_regression
variant_pairs: [[aws-c-common@old, aws-c-common@new]]
checks:
- metric: success
# Compare the old and new variants of each benchmark. The
# benchmark has regressed if the lambda returns true.
test: "lambda old, new: False if not old else not new"
- metric: solver_runtime
test: "lambda old, new: False if new < 10 else new/old > 1.5"
- metric: symex_runtime
test: "lambda old, new: False if new < 10 else new/old > 1.5"
140 changes: 140 additions & 0 deletions .github/workflows/benchcomp-parse_cbmc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
#!/usr/bin/env python3
# Copyright Kani Contributors
# SPDX-License-Identifier: Apache-2.0 OR MIT


import json
import logging
import os
import pathlib
import re
import sys
import textwrap

import yaml


def get_description():
return textwrap.dedent("""\
Read CBMC statistics from a Litani run.json file.
""")


def _get_metrics():
return {
"solver_runtime": {
"pat": re.compile(r"Runtime Solver: (?P<value>[-e\d\.]+)s"),
"parse": float,
},
"removed_program_steps": {
"pat": re.compile(r"slicing removed (?P<value>\d+) assignments"),
"parse": int,
},
"number_program_steps": {
"pat": re.compile(r"size of program expression: (?P<value>\d+) steps"),
"parse": int,
},
"number_vccs": {
"pat": re.compile(
r"Generated \d+ VCC\(s\), (?P<value>\d+) remaining after simplification"),
"parse": int,
},
"symex_runtime": {
"pat": re.compile(r"Runtime Symex: (?P<value>[-e\d\.]+)s"),
"parse": float,
},
"success": {
"pat": re.compile(r"VERIFICATION:- (?P<value>\w+)"),
"parse": lambda v: v == "SUCCESSFUL",
},
}


def get_metrics():
metrics = dict(_get_metrics())
for _, info in metrics.items():
for field in ("pat", "parse"):
info.pop(field)

# This is not a metric we return; it is used to find the correct value for
# the number_program_steps metric
metrics.pop("removed_program_steps", None)

# We don't parse this directly from the output
metrics["verification_time"] = {}

return metrics


def get_run(root_dir):
for fyle in pathlib.Path(root_dir).rglob("run.json"):
with open(fyle) as handle:
return json.load(handle)
logging.error("No run.json found in %s", root_dir)
sys.exit(1)


def main(root_dir):
root_dir = pathlib.Path(root_dir)
run = get_run(root_dir)
metrics = _get_metrics()
benchmarks = {}
for pipe in run["pipelines"]:
for stage in pipe["ci_stages"]:
if stage["name"] != "test":
continue
for job in stage["jobs"]:
if not job["wrapper_arguments"]["command"].startswith("cbmc "):
continue
if "cover" in job["wrapper_arguments"]["command"]:
continue
if "--show-properties" in job["wrapper_arguments"]["command"]:
continue

bench_name = f"{run['project']}::{pipe['name']}"
if not job["complete"]:
logging.warning(
"Found an incomplete CBMC benchmark '%s'",
bench_name)
continue

benchmarks[bench_name] = {
"metrics": {
"success": job["outcome"] == "success",
"verification_time": float(job["duration_ms"]),
}}

for line in job["stdout"]:
for metric, metric_info in metrics.items():
m = metric_info["pat"].search(line)
if not m:
continue

parse = metric_info["parse"]
try:
# CBMC prints out some metrics more than once, e.g.
# "Solver" and "decision procedure". Add those
# values together
benchmarks[bench_name]["metrics"][metric] += parse(m["value"])
except (KeyError, TypeError):
benchmarks[bench_name]["metrics"][metric] = parse(m["value"])
break

for bench_name, bench_info in benchmarks.items():
try:
n_steps = bench_info["metrics"]["number_program_steps"]
rm_steps = bench_info["metrics"]["removed_program_steps"]
bench_info["metrics"]["number_program_steps"] = n_steps - rm_steps
bench_info["metrics"].pop("removed_program_steps", None)
except KeyError:
pass

return {
"metrics": get_metrics(),
"benchmarks": benchmarks,
}


if __name__ == "__main__":
result = main(os.getcwd())
print(yaml.dump(result, default_flow_style=False))
116 changes: 116 additions & 0 deletions .github/workflows/performance.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
# Run performance benchmarks comparing two different CBMC versions.
name: Performance Benchmarking
on:
push:
branches: [ develop ]
pull_request:
branches: [ develop ]

jobs:
perf-benchcomp:
runs-on: ubuntu-20.04
steps:
- name: Save push event HEAD and HEAD~ to environment variables
if: ${{ github.event_name == 'push' }}
run: |
echo "NEW_REF=${{ github.event.after}}" | tee -a "$GITHUB_ENV"
echo "OLD_REF=${{ github.event.before }}" | tee -a "$GITHUB_ENV"
- name: Save pull request HEAD and base to environment variables
if: ${{ contains(fromJSON('["pull_request", "pull_request_target"]'), github.event_name) }}
run: |
echo "OLD_REF=${{ github.event.pull_request.base.sha }}" | tee -a "$GITHUB_ENV"
echo "NEW_REF=${{ github.event.pull_request.head.sha }}" | tee -a "$GITHUB_ENV"
- name: Check out CBMC (old variant)
uses: actions/checkout@v4
with:
submodules: recursive
path: ./old
ref: ${{ env.OLD_REF }}
fetch-depth: 2

- name: Check out CBMC (new variant)
uses: actions/checkout@v4
with:
submodules: recursive
path: ./new
ref: ${{ env.NEW_REF }}
fetch-depth: 1

- name: Fetch dependencies
env:
# This is needed in addition to -yq to prevent apt-get from asking for
# user input
DEBIAN_FRONTEND: noninteractive
run: |
sudo apt-get update
sudo apt-get install --no-install-recommends -yq cmake ninja-build gcc g++ maven flex bison ccache
- name: Prepare ccache
uses: actions/cache/restore@v4
with:
path: .ccache
key: ${{ runner.os }}-20.04-Release-${{ github.ref }}-${{ github.sha }}-PR
restore-keys: |
${{ runner.os }}-20.04-Release-${{ github.ref }}
${{ runner.os }}-20.04-Release
- name: ccache environment
run: echo "CCACHE_DIR=$PWD/.ccache" >> $GITHUB_ENV
- name: Zero ccache stats and limit in size
run: ccache -z --max-size=500M

- name: Configure using CMake (new variant)
run: cmake -S new/ -B new/build -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=/usr/bin/gcc -DCMAKE_CXX_COMPILER=/usr/bin/g++ -Dsat_impl="minisat2;cadical"
- name: ccache environment (new variant)
run: echo "CCACHE_BASEDIR=$PWD/new" >> $GITHUB_ENV
- name: Build with Ninja (new variant)
run: ninja -C new/build -j2
- name: Print ccache stats (new variant)
run: ccache -s

- name: Configure using CMake (old variant)
run: cmake -S old/ -B old/build -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=/usr/bin/gcc -DCMAKE_CXX_COMPILER=/usr/bin/g++ -Dsat_impl="minisat2;cadical"
- name: ccache environment (old variant)
run: echo "CCACHE_BASEDIR=$PWD/old" >> $GITHUB_ENV
- name: Build with Ninja (old variant)
run: ninja -C old/build -j2
- name: Print ccache stats (old variant)
run: ccache -s

- name: Obtain benchcomp
run: git clone --depth 1 https://github.com/model-checking/kani kani.git

- name: Fetch benchmarks
run: |
git clone --depth 1 https://github.com/awslabs/aws-c-common aws-c-common.git
pushd aws-c-common.git/verification/cbmc/proofs/
# keep 6 proofs of each kind of data structure
for c in $(ls -d aws_* | cut -f2 -d_ | uniq) ; do rm -rf $(ls -d aws_${c}_* | tail -n+7) ; done
popd
sudo apt-get install --no-install-recommends -yq gnuplot graphviz universal-ctags python3-pip
curl -L --remote-name \
https://github.com/awslabs/aws-build-accumulator/releases/download/1.29.0/litani-1.29.0.deb
sudo dpkg -i litani-1.29.0.deb
sudo python3 -m pip install cbmc-viewer
- name: Run benchcomp
run: |
kani.git/tools/benchcomp/bin/benchcomp \
--config new/.github/workflows/benchcomp-config.yaml \
run
kani.git/tools/benchcomp/bin/benchcomp \
--config new/.github/workflows/benchcomp-config.yaml \
collate
- name: Perf Regression Results Table
run: |
kani.git/tools/benchcomp/bin/benchcomp \
--config new/.github/workflows/benchcomp-config.yaml \
visualize --only dump_markdown_results_table >> "$GITHUB_STEP_SUMMARY"
- name: Run other visualizations
run: |
kani.git/tools/benchcomp/bin/benchcomp \
--config new/.github/workflows/benchcomp-config.yaml \
visualize --except dump_markdown_results_table

0 comments on commit 3ac5219

Please sign in to comment.