Skip to content

Commit

Permalink
Add workflows to build a commit and run tpch benchmarks
Browse files Browse the repository at this point in the history
- slightly different than existing one
  • Loading branch information
Raunak Bhagat committed Nov 19, 2024
1 parent 274f300 commit a71c566
Show file tree
Hide file tree
Showing 10 changed files with 402 additions and 3 deletions.
29 changes: 29 additions & 0 deletions .github/actions/install/action.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
name: Install uv, rust, and python
description: Install uv, rust, and python
inputs:
python_version:
description: The version of python to install
required: false
default: '3.9'
runs:
using: composite
steps:
- shell: bash
run: |
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
CARGO_BIN="$HOME/.cargo/bin"
echo 'export PATH="$CARGO_BIN:$PATH"' >> $HOME/.bashrc
echo "$CARGO_BIN" >> $GITHUB_PATH
- shell: bash
run: |
curl -LsSf https://astral.sh/uv/install.sh | sh
UV_BIN="$HOME/.local/bin"
echo 'export PATH="$UV_BIN:$PATH"' >> $HOME/.bashrc
echo "$UV_BIN" >> $GITHUB_PATH
- shell: bash
run: |
source $HOME/.bashrc
- shell: bash
run: |
uv python install ${{ inputs.python_version }}
uv python pin ${{ inputs.python_version }}
60 changes: 60 additions & 0 deletions .github/assets/ray.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
cluster_name: performance-comparisons

provider:
type: aws
region: us-west-2
cache_stopped_nodes: true
security_group:
GroupName: ray-autoscaler-c1

auth:
ssh_user: ubuntu
ssh_private_key: ~/.ssh/ci-github-actions-ray-cluster-key.pem

max_workers: 2
available_node_types:
ray.head.default:
resources: {"CPU": 0}
node_config:
KeyName: ci-github-actions-ray-cluster-key
InstanceType: i3.2xlarge
ImageId: ami-04dd23e62ed049936
IamInstanceProfile:
Name: ray-autoscaler-v1

ray.worker.default:
min_workers: 2
max_workers: 2
resources: {}
node_config:
KeyName: ci-github-actions-ray-cluster-key
InstanceType: i3.2xlarge
ImageId: ami-04dd23e62ed049936
IamInstanceProfile:
Name: ray-autoscaler-v1

setup_commands:
# Mount drive
- sudo mkfs.ext4 /dev/nvme0n1
- sudo mount -t ext4 /dev/nvme0n1 /tmp
- sudo chmod 777 /tmp
# Install dependencies
- sudo snap install aws-cli --classic
- curl -LsSf https://astral.sh/uv/install.sh | sh
- echo 'export PATH="$HOME/.local/bin:$PATH"' >> ~/.bashrc
- source ~/.bashrc
- uv python install 3.9
- uv python pin 3.9
- uv v
- echo "source $HOME/.venv/bin/activate" >> $HOME/.bashrc
- source .venv/bin/activate
- uv pip install pip ray[default] py-spy
# GitHub Actions workflow will replace all parameters between `<<...>>` with the
# actual values as determined dynamically during runtime of the actual workflow.
- uv pip install https://github-actions-artifacts-bucket.s3.us-west-2.amazonaws.com/builds/<<SHA>>/<<WHEEL>>
# Download benchmarking fixtures
- |
aws s3 sync \
s3://eventual-dev-benchmarking-fixtures/uncompressed/tpch-dbgen/<<SCALE_FACTOR>>/<<PARTITION_SIZE>>/parquet/ \
/tmp/data/<<SCALE_FACTOR>>/<<PARTITION_SIZE>>/parquet/ \
--quiet
7 changes: 7 additions & 0 deletions .github/scripts/build-commit.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/bash

BRANCH_NAME=$(git rev-parse --abbrev-ref HEAD)
gh workflow run build-commit.yaml \
--ref $BRANCH_NAME \
-f commit=$BRANCH_NAME \
-f machine_type="buildjet-8vcpu-ubuntu-2004"
29 changes: 29 additions & 0 deletions .github/scripts/csv_to_md.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import csv
import sys
from pathlib import Path

file = Path(sys.argv[1])
assert file.exists()

output = Path(sys.argv[2])
assert not output.exists()


def make_md_row(row: list[str]) -> str:
return f'|{"|".join(row)}|\n'


with open(file) as file:
with open(output, "w+") as output:
csv_reader = csv.reader(file)
header = next(csv_reader)

header_str = make_md_row(header)
output.write(header_str)

separator_str = make_md_row(["-"] * len(header))
output.write(separator_str)

for row in csv_reader:
row_str = make_md_row(row)
output.write(row_str)
2 changes: 2 additions & 0 deletions .github/scripts/performance-comparisons.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
BRANCH_NAME=$(git rev-parse --abbrev-ref HEAD)
gh workflow run performance-comparisons.yaml --ref $BRANCH_NAME -f c1=$BRANCH_NAME
36 changes: 36 additions & 0 deletions .github/workflows/build-commit-run-tpch.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
name: Build commit and run tpch benchmarks

on:
workflow_dispatch:
inputs:
wheel:
description: The wheel artifact to use
required: false
default: getdaft-0.3.0.dev0-cp38-abi3-manylinux_2_31_x86_64.whl
skip_questions:
description: The TPC-H questions to skip
required: false
default: "2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22"
scale_factor:
description: Which scale factor to use
required: false
default: 2
partition_size:
description: Which partition size to use
required: false
default: 2

jobs:
build:
uses: ./.github/workflows/build-commit.yaml
secrets:
ACTIONS_AWS_ROLE_ARN: ${{ secrets.ACTIONS_AWS_ROLE_ARN }}

run:
needs: build
uses: ./.github/workflows/run-tpch.yaml
with:
wheel: ${{ needs.build.outputs.wheel }}
skip_questions: ${{ inputs.skip_questions }}
scale_factor: ${{ inputs.scale_factor }}
partition_size: ${{ inputs.partition_size }}
66 changes: 66 additions & 0 deletions .github/workflows/build-commit.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
name: Build a Daft commit and store the outputted wheel in AWS S3

on:
workflow_dispatch:
workflow_call:
secrets:
ACTIONS_AWS_ROLE_ARN:
description: The ARN of the AWS role to assume
required: true
outputs:
wheel:
description: The wheel file that was built
value: ${{ jobs.build-commit.outputs.wheel }}

jobs:
build-commit:
runs-on: buildjet-8vcpu-ubuntu-2004
timeout-minutes: 15 # Remove for ssh debugging
permissions:
id-token: write
contents: read
outputs:
wheel: ${{ steps.build_and_upload.outputs.wheel }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 1
- uses: aws-actions/configure-aws-credentials@v4
with:
aws-region: us-west-2
role-session-name: build-commit-workflow
role-to-assume: ${{ secrets.ACTIONS_AWS_ROLE_ARN }}
- uses: ./.github/actions/install
- uses: buildjet/cache@v4
with:
path: ~/target
key: ${{ runner.os }}-cargo-deps-${{ hashFiles('**/Cargo.lock') }}
restore-keys: ${{ runner.os }}-cargo-deps-
- id: build_and_upload
run: |
if ! ls ~/target/wheels/*.whl 1> /dev/null 2>&1; then
# Build wheel
export CARGO_TARGET_DIR=~/target
uv v
source .venv/bin/activate
uv pip install pip maturin
maturin build --release
fi
count=$(ls ~/target/wheels/*.whl 2> /dev/null | wc -l)
if [ "$count" -gt 1 ]; then
echo "Found more than 1 wheel"
exit 1
elif [ "$count" -eq 0 ]; then
echo "Found no wheel files"
exit 1
fi
# Upload wheel
for file in ~/target/wheels/*.whl; do
aws s3 cp $file s3://github-actions-artifacts-bucket/builds/${{ github.sha }}/ --acl public-read --no-progress;
file_basename=$(basename $file)
echo "wheel=$file_basename" >> $GITHUB_OUTPUT
echo "Output wheel has been built and stored in S3 at the following location:" >> $GITHUB_STEP_SUMMARY
echo "https://us-west-2.console.aws.amazon.com/s3/buckets/github-actions-artifacts-bucket?prefix=builds/${{ github.sha }}/" >> $GITHUB_STEP_SUMMARY
done
61 changes: 61 additions & 0 deletions .github/workflows/performance-comparisons.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
name: Run performance comparisons

on:
workflow_dispatch:

jobs:
build:
uses: ./.github/workflows/build-commit.yaml
secrets:
ACTIONS_AWS_ROLE_ARN: ${{ secrets.ACTIONS_AWS_ROLE_ARN }}

run:
needs: build
runs-on: [self-hosted, linux, x64, ci-dev]
timeout-minutes: 15 # Remove for ssh debugging
permissions:
id-token: write
contents: read
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 1
- uses: aws-actions/configure-aws-credentials@v4
with:
aws-region: us-west-2
role-session-name: run-tpch-workflow
- uses: ./.github/actions/install
- run: |
# Dynamically update ray config file
sed -i 's|<<SHA>>|${{ github.sha }}|g' .github/assets/ray.yaml
sed -i 's|<<WHEEL>>|${{ needs.build.outputs.wheel }}|g' .github/assets/ray.yaml
# Download private ssh key
KEY=$(aws secretsmanager get-secret-value --secret-id ci-github-actions-ray-cluster-key-3 --query SecretString --output text)
echo "$KEY" >> ~/.ssh/ci-github-actions-ray-cluster-key.pem
chmod 600 ~/.ssh/ci-github-actions-ray-cluster-key.pem
# Install dependencies
uv v
source .venv/bin/activate
rm -rf daft
uv pip install ray[default] boto3 https://github-actions-artifacts-bucket.s3.us-west-2.amazonaws.com/builds/${{ github.sha }}/${{ needs.build.outputs.wheel }}
# Boot up ray cluster
ray up .github/assets/ray.yaml -y
HEAD_NODE_IP=$(ray get-head-ip .github/assets/ray.yaml | tail -n 1)
ssh -o StrictHostKeyChecking=no -fN -L 8265:localhost:8265 -i ~/.ssh/ci-github-actions-ray-cluster-key.pem ubuntu@$HEAD_NODE_IP
DAFT_RUNNER=ray python -m benchmarking.tpch \
--skip_questions="2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22" \
--num_parts 2 \
--scale_factor 2 \
--parquet_file_cache /tmp/data \
--output_csv output.csv \
--ray_job_dashboard_url http://localhost:8265 \
--skip_warmup \
--no_pymodules
ray down .github/assets/ray.yaml -y
- uses: actions/upload-artifact@v4
with:
name: output.csv
path: output.csv
Loading

0 comments on commit a71c566

Please sign in to comment.