Skip to content

Commit

Permalink
Try running a basic comparison benchmark between base and pr commit
Browse files Browse the repository at this point in the history
  • Loading branch information
gruuya committed Mar 5, 2024
1 parent ac27428 commit f4a0008
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 5 deletions.
59 changes: 59 additions & 0 deletions .github/workflows/pr_benchmarks.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
name: Run and Cache Benchmarks

on:
pull_request:
types: [labeled, opened, reopened, synchronize]

jobs:
benchmark:
name: Run Benchmarks
runs-on: ubuntu-latest
steps:
- name: Dump GitHub context
env:
GITHUB_CONTEXT: ${{ toJSON(github) }}
run: echo "$GITHUB_CONTEXT"

- name: Checkout PR changes
uses: actions/checkout@v4

- name: Setup data and generate unique result names
run: |
cd benchmarks
mkdir data
# Setup the TPC-H data set with a scale factor of 10.
# Hack: pipe the output and explicitly pre-prend a carriage
# return to avoid the output staircase effect (e.g. percentage
# updates appearing on subsequent lines).
./bench.sh data tpch | | awk '{printf "\r%s", $0; fflush();}'
# Generate a unique-ish identifier for the results using
# branch name and commit sha
short_ref=$(echo "${{ github.head_ref }}" | cut -c1-20)
short_sha=$(echo "${{ github.sha }}" | cut -c1-7)
echo "HEAD_REF_SHA=$short_ref-$short_sha" >> "$GITHUB_ENV"
short_sha=$(echo "${{ github.pull_request.base.sha }}" | cut -c1-7)
echo "BASE_REF_SHA=${{ github.base_ref }}-$short_sha" >> "$GITHUB_ENV"
- name: Benchmark PR changes
run: |
cd benchmarks
RESULTS_NAME=$HEAD_REF_SHA ./bench.sh run tpch
- name: Checkout base commit
uses: actions/checkout@v4
with:
ref: ${{ github.event.pull_request.base.sha }}
clean: false

- name: Benchmark baseline and compare
run: |
cd benchmarks
RESULTS_NAME=$BASE_REF_SHA ./bench.sh run tpch
pip3 install rich
./bench.sh compare $BASE_REF_SHA $HEAD_REF_SHA
12 changes: 7 additions & 5 deletions benchmarks/bench.sh
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ clickbench_extended: ClickBench "inspired" queries against a single parquet (
DATA_DIR directory to store datasets
CARGO_COMMAND command that runs the benchmark binary
DATAFUSION_DIR directory to use (default $DATAFUSION_DIR)
RESULTS_NAME folder where the benchmark files are stored
"
exit 1
}
Expand Down Expand Up @@ -166,18 +167,19 @@ main() {
esac
;;
run)
# Parse positional paraleters
# Parse positional parameters
BENCHMARK=${ARG2:-"${BENCHMARK}"}
BRANCH_NAME=$(cd ${DATAFUSION_DIR} && git rev-parse --abbrev-ref HEAD)
BRANCH_NAME=${BRANCH_NAME//\//_} # mind blowing syntax to replace / with _
RESULTS_DIR=${RESULTS_DIR:-"$SCRIPT_DIR/results/$BRANCH_NAME"}
RESULTS_NAME=${RESULTS_NAME:-"${BRANCH_NAME}"}
RESULTS_DIR=${RESULTS_DIR:-"$SCRIPT_DIR/results/$RESULTS_NAME"}

echo "***************************"
echo "DataFusion Benchmark Script"
echo "COMMAND: ${COMMAND}"
echo "BENCHMARK: ${BENCHMARK}"
echo "DATAFUSION_DIR: ${DATAFUSION_DIR}"
echo "BRACH_NAME: ${BRANCH_NAME}"
echo "BRANCH_NAME: ${BRANCH_NAME}"
echo "DATA_DIR: ${DATA_DIR}"
echo "RESULTS_DIR: ${RESULTS_DIR}"
echo "CARGO_COMMAND: ${CARGO_COMMAND}"
Expand Down Expand Up @@ -278,7 +280,7 @@ data_tpch() {
echo " tbl files exist ($FILE exists)."
else
echo " creating tbl files with tpch_dbgen..."
docker run -v "${TPCH_DIR}":/data -it --rm ghcr.io/scalytics/tpch-docker:main -vf -s ${SCALE_FACTOR}
docker run -v "${TPCH_DIR}":/data --rm ghcr.io/scalytics/tpch-docker:main -vf -s ${SCALE_FACTOR}
fi

# Copy expected answers into the ./data/answers directory if it does not already exist
Expand All @@ -288,7 +290,7 @@ data_tpch() {
else
echo " Copying answers to ${TPCH_DIR}/answers"
mkdir -p "${TPCH_DIR}/answers"
docker run -v "${TPCH_DIR}":/data -it --entrypoint /bin/bash --rm ghcr.io/scalytics/tpch-docker:main -c "cp -f /opt/tpch/2.18.0_rc2/dbgen/answers/* /data/answers/"
docker run -v "${TPCH_DIR}":/data --entrypoint /bin/bash --rm ghcr.io/scalytics/tpch-docker:main -c "cp -f /opt/tpch/2.18.0_rc2/dbgen/answers/* /data/answers/"
fi

# Create 'parquet' files from tbl
Expand Down
3 changes: 3 additions & 0 deletions datafusion/physical-plan/src/sorts/sort.rs
Original file line number Diff line number Diff line change
Expand Up @@ -980,6 +980,9 @@ impl ExecutionPlan for SortExec {
let batch = batch?;
sorter.insert_batch(batch).await?;
}
// Test whether benchmarks catch this
// TODO: remove before merge!
tokio::time::sleep(std::time::Duration::from_secs(1)).await;
sorter.sort()
})
.try_flatten(),
Expand Down

0 comments on commit f4a0008

Please sign in to comment.