Skip to content

Commit

Permalink
Add workflows to build a commit and run tpch benchmarks
Browse files Browse the repository at this point in the history
- slightly different than existing one
  • Loading branch information
Raunak Bhagat committed Nov 22, 2024
1 parent ec39dc0 commit 3bc0a42
Show file tree
Hide file tree
Showing 8 changed files with 423 additions and 3 deletions.
29 changes: 29 additions & 0 deletions .github/actions/install/action.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
name: Install uv, rust, and python
description: Install uv, rust, and python
inputs:
python_version:
description: The version of python to install
required: false
default: '3.9'
runs:
using: composite
steps:
- shell: bash
run: |
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
CARGO_BIN="$HOME/.cargo/bin"
echo 'export PATH="$CARGO_BIN:$PATH"' >> $HOME/.bashrc
echo "$CARGO_BIN" >> $GITHUB_PATH
- shell: bash
run: |
curl -LsSf https://astral.sh/uv/install.sh | sh
UV_BIN="$HOME/.local/bin"
echo 'export PATH="$UV_BIN:$PATH"' >> $HOME/.bashrc
echo "$UV_BIN" >> $GITHUB_PATH
- shell: bash
run: |
source $HOME/.bashrc
- shell: bash
run: |
uv python install ${{ inputs.python_version }}
uv python pin ${{ inputs.python_version }}
65 changes: 65 additions & 0 deletions .github/assets/benchmarking_ray_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
cluster_name: performance-comparisons

provider:
type: aws
region: us-west-2
cache_stopped_nodes: true
security_group:
GroupName: ray-autoscaler-c1

auth:
ssh_user: ubuntu
ssh_private_key: ~/.ssh/ci-github-actions-ray-cluster-key.pem

max_workers: 2
available_node_types:
ray.head.default:
resources: {"CPU": 0}
node_config:
KeyName: ci-github-actions-ray-cluster-key
InstanceType: i3.2xlarge
ImageId: ami-04dd23e62ed049936
IamInstanceProfile:
Name: ray-autoscaler-v1

ray.worker.default:
min_workers: 2
max_workers: 2
resources: {}
node_config:
KeyName: ci-github-actions-ray-cluster-key
InstanceType: i3.2xlarge
ImageId: ami-04dd23e62ed049936
IamInstanceProfile:
Name: ray-autoscaler-v1

setup_commands:
# Mount drive
- |
findmnt /tmp 1> /dev/null
code=$?
if [ $code -ne 0 ]; then
sudo mkfs.ext4 /dev/nvme0n1
sudo mount -t ext4 /dev/nvme0n1 /tmp
sudo chmod 777 /tmp
fi
# Install dependencies
- sudo snap install aws-cli --classic
- curl -LsSf https://astral.sh/uv/install.sh | sh
- echo 'export PATH="$HOME/.local/bin:$PATH"' >> ~/.bashrc
- source ~/.bashrc
- uv python install <<PYTHON_VERSION>>
- uv python pin <<PYTHON_VERSION>>
- uv v
- echo "source $HOME/.venv/bin/activate" >> $HOME/.bashrc
- source .venv/bin/activate
- uv pip install pip ray[default] py-spy
# GitHub Actions workflow will replace all parameters between `<<...>>` with the
# actual values as determined dynamically during runtime of the actual workflow.
- uv pip install https://github-actions-artifacts-bucket.s3.us-west-2.amazonaws.com/builds/<<SHA>>/<<WHEEL>>
# Download benchmarking fixtures
- |
aws s3 sync \
s3://eventual-dev-benchmarking-fixtures/uncompressed/tpch-dbgen/<<SCALE_FACTOR>>/<<PARTITION_SIZE>>/parquet/ \
/tmp/data/<<SCALE_FACTOR>>/<<PARTITION_SIZE>>/parquet/ \
--quiet
29 changes: 29 additions & 0 deletions .github/scripts/csv_to_md.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import csv
import sys
from pathlib import Path

file = Path(sys.argv[1])
assert file.exists()

output = Path(sys.argv[2])
assert not output.exists()


def make_md_row(row: list[str]) -> str:
return f'|{"|".join(row)}|\n'


with open(file) as file:
with open(output, "w+") as output:
csv_reader = csv.reader(file)
header = next(csv_reader)

header_str = make_md_row(header)
output.write(header_str)

separator_str = make_md_row(["-"] * len(header))
output.write(separator_str)

for row in csv_reader:
row_str = make_md_row(row)
output.write(row_str)
54 changes: 54 additions & 0 deletions .github/workflows/build-commit-run-tpch.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
name: Build commit and run tpch benchmarks

on:
workflow_dispatch:
inputs:
skip_questions:
type: string
description: The TPC-H questions to skip
required: false
default: ""
scale_factor:
type: choice
options:
- '2'
- '10'
- '100'
- '1000'
description: Which scale factor to use
required: false
default: '2'
partition_size:
type: choice
options:
- '2'
- '32'
- '32'
- '100'
- '300'
- '320'
- '512'
description: Which partition size to use
required: false
default: '2'
python_version:
type: string
description: The version of python to use
required: false
default: '3.9'

jobs:
build:
uses: ./.github/workflows/build-commit.yaml
secrets:
ACTIONS_AWS_ROLE_ARN: ${{ secrets.ACTIONS_AWS_ROLE_ARN }}

run:
needs: build
uses: ./.github/workflows/run-tpch.yaml
with:
wheel: ${{ needs.build.outputs.wheel }}
skip_questions: ${{ inputs.skip_questions }}
scale_factor: ${{ inputs.scale_factor }}
partition_size: ${{ inputs.partition_size }}
python_version: ${{ inputs.python_version }}
68 changes: 68 additions & 0 deletions .github/workflows/build-commit.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
name: Build a Daft commit and store the outputted wheel in AWS S3

on:
workflow_dispatch:
workflow_call:
secrets:
ACTIONS_AWS_ROLE_ARN:
description: The ARN of the AWS role to assume
required: true
outputs:
wheel:
description: The wheel file that was built
value: ${{ jobs.build-commit.outputs.wheel }}

jobs:
build-commit:
runs-on: buildjet-8vcpu-ubuntu-2004
timeout-minutes: 15 # Remove for ssh debugging
permissions:
id-token: write
contents: read
outputs:
wheel: ${{ steps.build_and_upload.outputs.wheel }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 1
- uses: aws-actions/configure-aws-credentials@v4
with:
aws-region: us-west-2
role-session-name: build-commit-workflow
role-to-assume: ${{ secrets.ACTIONS_AWS_ROLE_ARN }}
- uses: ./.github/actions/install
- uses: buildjet/cache@v4
with:
path: ~/target
key: ${{ runner.os }}-cargo-deps-${{ hashFiles('**/Cargo.lock') }}
restore-keys: ${{ runner.os }}-cargo-deps-
- id: build_and_upload
run: |
export CARGO_TARGET_DIR=~/target
uv v
source .venv/bin/activate
uv pip install pip maturin boto3
if ! ls ~/target/wheels/*.whl 1> /dev/null 2>&1; then
# Build wheel
maturin build --release
fi
count=$(ls ~/target/wheels/*.whl 2> /dev/null | wc -l)
if [ "$count" -gt 1 ]; then
echo "Found more than 1 wheel"
exit 1
elif [ "$count" -eq 0 ]; then
echo "Found no wheel files"
exit 1
fi
# Upload wheel
for file in ~/target/wheels/*.whl; do
aws s3 cp $file s3://github-actions-artifacts-bucket/builds/${{ github.sha }}/ --acl public-read --no-progress;
file_basename=$(basename $file)
echo "wheel=$file_basename" >> $GITHUB_OUTPUT
echo "Output wheel has been built and stored in S3 at the following location:" >> $GITHUB_STEP_SUMMARY
echo "https://us-west-2.console.aws.amazon.com/s3/buckets/github-actions-artifacts-bucket?prefix=builds/${{ github.sha }}/" >> $GITHUB_STEP_SUMMARY
done
python tools/generate_whl_html_manifest.py
Loading

0 comments on commit 3bc0a42

Please sign in to comment.