Skip to content

Commit

Permalink
Test CI benches
Browse files Browse the repository at this point in the history
  • Loading branch information
gruuya committed Mar 11, 2024
2 parents 88187d4 + d01f559 commit b00672a
Show file tree
Hide file tree
Showing 3 changed files with 144 additions and 5 deletions.
86 changes: 86 additions & 0 deletions .github/workflows/pr_benchmarks.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
name: Benchmarks

on:
issue_comment:

jobs:
benchmark:
name: Run Benchmarks
runs-on: ubuntu-latest
if: github.event.issue.pull_request && contains(github.event.comment.body, '/benchmark')
steps:
- name: Dump GitHub context
env:
GITHUB_CONTEXT: ${{ toJSON(github) }}
run: echo "$GITHUB_CONTEXT"

- name: Checkout PR changes
uses: actions/checkout@v4
with:
ref: refs/pull/${{ github.event.issue.number }}/head

- name: Setup data and generate unique result names
run: |
cd benchmarks
mkdir data
# Setup the TPC-H data set with a scale factor of 10
./bench.sh data tpch
# Generate a unique-ish identifiers for the results
echo "HEAD_REF_SHA=pr-${{ github.event.issue.number }}" >> "$GITHUB_ENV"
short_sha=$(echo "${{ github.sha }}" | cut -c1-7)
echo "BASE_REF_SHA=main-$short_sha" >> "$GITHUB_ENV"
- name: Benchmark PR changes
env:
RESULTS_NAME: ${{ env.HEAD_REF_SHA }}
run: |
cd benchmarks
./bench.sh run tpch
- name: Checkout base commit
uses: actions/checkout@v4
with:
ref: ${{ github.sha }}
clean: false

- name: Benchmark baseline and generate comparison message
env:
RESULTS_NAME: ${{ env.BASE_REF_SHA }}
run: |
cd benchmarks
./bench.sh run tpch
echo ${{ github.event.issue.number }} > pr
pip3 install rich
cat > message.md <<EOF
# Benchmark results
<details>
<summary>Benchmarks comparing ${{ github.sha }} and PR ${{ github.event.issue.number }}</summary>
\`\`\`
$(./bench.sh compare ${{ env.BASE_REF_SHA }} ${{ env.HEAD_REF_SHA }})
\`\`\`
</details>
EOF
cat message.md
- name: Upload benchmark comparison message
uses: actions/upload-artifact@v4
with:
name: message
path: benchmarks/message.md

- name: Upload PR number
uses: actions/upload-artifact@v4
with:
name: pr
path: benchmarks/pr
51 changes: 51 additions & 0 deletions .github/workflows/pr_comment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
name: PR Comment

on:
workflow_run:
workflows: ["Benchmarks"]
types:
- completed

jobs:
comment:
name: PR Comment
runs-on: ubuntu-latest
if: github.event.workflow_run.conclusion == 'success'
steps:
- name: Dump GitHub context
env:
GITHUB_CONTEXT: ${{ toJSON(github) }}
run: echo "$GITHUB_CONTEXT"

- name: Download comment message
uses: actions/download-artifact@v4
with:
name: message
run-id: ${{ github.event.workflow_run.id }}
github-token: ${{ secrets.GITHUB_TOKEN }}

- name: Download pr number
uses: actions/download-artifact@v4
with:
name: pr
run-id: ${{ github.event.workflow_run.id }}
github-token: ${{ secrets.GITHUB_TOKEN }}

- name: Print message and pr number
run: |
cat pr
echo "PR_NUMBER=$(cat pr)" >> "$GITHUB_ENV"
cat message.md
- name: Post the comment
uses: actions/github-script@v7
with:
script: |
const fs = require('fs');
const content = fs.readFileSync('message.md', 'utf8');
github.rest.issues.createComment({
issue_number: process.env.PR_NUMBER,
owner: context.repo.owner,
repo: context.repo.repo,
body: content,
})
12 changes: 7 additions & 5 deletions benchmarks/bench.sh
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ clickbench_extended: ClickBench "inspired" queries against a single parquet (
DATA_DIR directory to store datasets
CARGO_COMMAND command that runs the benchmark binary
DATAFUSION_DIR directory to use (default $DATAFUSION_DIR)
RESULTS_NAME folder where the benchmark files are stored
"
exit 1
}
Expand Down Expand Up @@ -166,18 +167,19 @@ main() {
esac
;;
run)
# Parse positional paraleters
# Parse positional parameters
BENCHMARK=${ARG2:-"${BENCHMARK}"}
BRANCH_NAME=$(cd ${DATAFUSION_DIR} && git rev-parse --abbrev-ref HEAD)
BRANCH_NAME=${BRANCH_NAME//\//_} # mind blowing syntax to replace / with _
RESULTS_DIR=${RESULTS_DIR:-"$SCRIPT_DIR/results/$BRANCH_NAME"}
RESULTS_NAME=${RESULTS_NAME:-"${BRANCH_NAME}"}
RESULTS_DIR=${RESULTS_DIR:-"$SCRIPT_DIR/results/$RESULTS_NAME"}

echo "***************************"
echo "DataFusion Benchmark Script"
echo "COMMAND: ${COMMAND}"
echo "BENCHMARK: ${BENCHMARK}"
echo "DATAFUSION_DIR: ${DATAFUSION_DIR}"
echo "BRACH_NAME: ${BRANCH_NAME}"
echo "BRANCH_NAME: ${BRANCH_NAME}"
echo "DATA_DIR: ${DATA_DIR}"
echo "RESULTS_DIR: ${RESULTS_DIR}"
echo "CARGO_COMMAND: ${CARGO_COMMAND}"
Expand Down Expand Up @@ -278,7 +280,7 @@ data_tpch() {
echo " tbl files exist ($FILE exists)."
else
echo " creating tbl files with tpch_dbgen..."
docker run -v "${TPCH_DIR}":/data -it --rm ghcr.io/scalytics/tpch-docker:main -vf -s ${SCALE_FACTOR}
docker run -v "${TPCH_DIR}":/data --rm ghcr.io/scalytics/tpch-docker:main -vf -s ${SCALE_FACTOR}
fi

# Copy expected answers into the ./data/answers directory if it does not already exist
Expand All @@ -288,7 +290,7 @@ data_tpch() {
else
echo " Copying answers to ${TPCH_DIR}/answers"
mkdir -p "${TPCH_DIR}/answers"
docker run -v "${TPCH_DIR}":/data -it --entrypoint /bin/bash --rm ghcr.io/scalytics/tpch-docker:main -c "cp -f /opt/tpch/2.18.0_rc2/dbgen/answers/* /data/answers/"
docker run -v "${TPCH_DIR}":/data --entrypoint /bin/bash --rm ghcr.io/scalytics/tpch-docker:main -c "cp -f /opt/tpch/2.18.0_rc2/dbgen/answers/* /data/answers/"
fi

# Create 'parquet' files from tbl
Expand Down

0 comments on commit b00672a

Please sign in to comment.