-
Notifications
You must be signed in to change notification settings - Fork 174
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[FEAT] GHA workflow to perform tcph benchmarking #3184
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
name: Install uv, rust, and python | ||
description: Install uv, rust, and python | ||
inputs: | ||
python_version: | ||
description: The version of python to install | ||
required: false | ||
default: '3.9' | ||
runs: | ||
using: composite | ||
steps: | ||
- shell: bash | ||
run: | | ||
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y | ||
CARGO_BIN="$HOME/.cargo/bin" | ||
echo 'export PATH="$CARGO_BIN:$PATH"' >> $HOME/.bashrc | ||
echo "$CARGO_BIN" >> $GITHUB_PATH | ||
- shell: bash | ||
run: | | ||
curl -LsSf https://astral.sh/uv/install.sh | sh | ||
UV_BIN="$HOME/.local/bin" | ||
echo 'export PATH="$UV_BIN:$PATH"' >> $HOME/.bashrc | ||
echo "$UV_BIN" >> $GITHUB_PATH | ||
- shell: bash | ||
run: | | ||
source $HOME/.bashrc | ||
- shell: bash | ||
run: | | ||
uv python install ${{ inputs.python_version }} | ||
uv python pin ${{ inputs.python_version }} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
cluster_name: performance-comparisons | ||
|
||
provider: | ||
type: aws | ||
region: us-west-2 | ||
cache_stopped_nodes: true | ||
security_group: | ||
GroupName: ray-autoscaler-c1 | ||
|
||
auth: | ||
ssh_user: ubuntu | ||
ssh_private_key: ~/.ssh/ci-github-actions-ray-cluster-key.pem | ||
|
||
max_workers: 2 | ||
available_node_types: | ||
ray.head.default: | ||
resources: {"CPU": 0} | ||
node_config: | ||
KeyName: ci-github-actions-ray-cluster-key | ||
InstanceType: i3.2xlarge | ||
ImageId: ami-04dd23e62ed049936 | ||
IamInstanceProfile: | ||
Name: ray-autoscaler-v1 | ||
|
||
ray.worker.default: | ||
min_workers: 2 | ||
max_workers: 2 | ||
resources: {} | ||
node_config: | ||
KeyName: ci-github-actions-ray-cluster-key | ||
InstanceType: i3.2xlarge | ||
ImageId: ami-04dd23e62ed049936 | ||
IamInstanceProfile: | ||
Name: ray-autoscaler-v1 | ||
|
||
setup_commands: | ||
# Mount drive | ||
- | | ||
findmnt /tmp 1> /dev/null | ||
code=$? | ||
if [ $code -ne 0 ]; then | ||
sudo mkfs.ext4 /dev/nvme0n1 | ||
sudo mount -t ext4 /dev/nvme0n1 /tmp | ||
sudo chmod 777 /tmp | ||
fi | ||
# Install dependencies | ||
- sudo snap install aws-cli --classic | ||
- curl -LsSf https://astral.sh/uv/install.sh | sh | ||
- echo 'export PATH="$HOME/.local/bin:$PATH"' >> ~/.bashrc | ||
- source ~/.bashrc | ||
- uv python install <<PYTHON_VERSION>> | ||
- uv python pin <<PYTHON_VERSION>> | ||
- uv v | ||
- echo "source $HOME/.venv/bin/activate" >> $HOME/.bashrc | ||
- source .venv/bin/activate | ||
- uv pip install pip ray[default] py-spy | ||
# GitHub Actions workflow will replace all parameters between `<<...>>` with the | ||
# actual values as determined dynamically during runtime of the actual workflow. | ||
- uv pip install https://github-actions-artifacts-bucket.s3.us-west-2.amazonaws.com/builds/<<SHA>>/<<WHEEL>> | ||
# Download benchmarking fixtures | ||
- | | ||
aws s3 sync \ | ||
s3://eventual-dev-benchmarking-fixtures/uncompressed/tpch-dbgen/<<SCALE_FACTOR>>/<<PARTITION_SIZE>>/parquet/ \ | ||
/tmp/data/<<SCALE_FACTOR>>/<<PARTITION_SIZE>>/parquet/ \ | ||
--quiet | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this intended behavior? Downloading 1TB of data is going to kill the machine. Are we benchmarking off s3 or local? |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
import csv | ||
import sys | ||
from pathlib import Path | ||
|
||
file = Path(sys.argv[1]) | ||
assert file.exists() | ||
|
||
output = Path(sys.argv[2]) | ||
assert not output.exists() | ||
|
||
|
||
def make_md_row(row: list[str]) -> str: | ||
return f'|{"|".join(row)}|\n' | ||
|
||
|
||
with open(file) as file: | ||
with open(output, "w+") as output: | ||
csv_reader = csv.reader(file) | ||
header = next(csv_reader) | ||
|
||
header_str = make_md_row(header) | ||
output.write(header_str) | ||
|
||
separator_str = make_md_row(["-"] * len(header)) | ||
output.write(separator_str) | ||
|
||
for row in csv_reader: | ||
row_str = make_md_row(row) | ||
output.write(row_str) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok, but note that this was a nice-to-have that really should have been in a follow-on |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
name: Build commit and run tpch benchmarks | ||
|
||
on: | ||
workflow_dispatch: | ||
inputs: | ||
skip_questions: | ||
type: string | ||
description: The TPC-H questions to skip | ||
required: false | ||
default: "" | ||
scale_factor: | ||
type: choice | ||
options: | ||
- '2' | ||
- '10' | ||
- '100' | ||
- '1000' | ||
description: Which scale factor to use | ||
required: false | ||
default: '2' | ||
partition_size: | ||
type: choice | ||
options: | ||
- '2' | ||
- '32' | ||
- '32' | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we should combine SF with partition size, since not all permutations have datasets? I.e. Or even simpler, just let people select the URL There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ya, I was thinking of this as well. Options could be:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will update. |
||
- '100' | ||
- '300' | ||
- '320' | ||
- '512' | ||
description: Which partition size to use | ||
required: false | ||
default: '2' | ||
python_version: | ||
type: string | ||
description: The version of python to use | ||
required: false | ||
default: '3.9' | ||
|
||
jobs: | ||
build: | ||
uses: ./.github/workflows/build-commit.yaml | ||
secrets: | ||
ACTIONS_AWS_ROLE_ARN: ${{ secrets.ACTIONS_AWS_ROLE_ARN }} | ||
|
||
run: | ||
needs: build | ||
uses: ./.github/workflows/run-tpch.yaml | ||
with: | ||
wheel: ${{ needs.build.outputs.wheel }} | ||
skip_questions: ${{ inputs.skip_questions }} | ||
scale_factor: ${{ inputs.scale_factor }} | ||
partition_size: ${{ inputs.partition_size }} | ||
python_version: ${{ inputs.python_version }} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
name: Build a Daft commit and store the outputted wheel in AWS S3 | ||
|
||
on: | ||
workflow_dispatch: | ||
workflow_call: | ||
secrets: | ||
ACTIONS_AWS_ROLE_ARN: | ||
description: The ARN of the AWS role to assume | ||
required: true | ||
outputs: | ||
wheel: | ||
description: The wheel file that was built | ||
value: ${{ jobs.build-commit.outputs.wheel }} | ||
|
||
jobs: | ||
build-commit: | ||
runs-on: buildjet-8vcpu-ubuntu-2004 | ||
timeout-minutes: 15 # Remove for ssh debugging | ||
permissions: | ||
id-token: write | ||
contents: read | ||
outputs: | ||
wheel: ${{ steps.build_and_upload.outputs.wheel }} | ||
steps: | ||
- uses: actions/checkout@v4 | ||
with: | ||
fetch-depth: 1 | ||
- uses: aws-actions/configure-aws-credentials@v4 | ||
with: | ||
aws-region: us-west-2 | ||
role-session-name: build-commit-workflow | ||
role-to-assume: ${{ secrets.ACTIONS_AWS_ROLE_ARN }} | ||
- uses: ./.github/actions/install | ||
- uses: buildjet/cache@v4 | ||
with: | ||
path: ~/target | ||
key: ${{ runner.os }}-cargo-deps-${{ hashFiles('**/Cargo.lock') }} | ||
restore-keys: ${{ runner.os }}-cargo-deps- | ||
- id: build_and_upload | ||
run: | | ||
export CARGO_TARGET_DIR=~/target | ||
uv v | ||
source .venv/bin/activate | ||
uv pip install pip maturin boto3 | ||
|
||
if ! ls ~/target/wheels/*.whl 1> /dev/null 2>&1; then | ||
# Build wheel | ||
maturin build --release | ||
fi | ||
|
||
count=$(ls ~/target/wheels/*.whl 2> /dev/null | wc -l) | ||
if [ "$count" -gt 1 ]; then | ||
echo "Found more than 1 wheel" | ||
exit 1 | ||
elif [ "$count" -eq 0 ]; then | ||
echo "Found no wheel files" | ||
exit 1 | ||
fi | ||
|
||
# Upload wheel | ||
for file in ~/target/wheels/*.whl; do | ||
aws s3 cp $file s3://github-actions-artifacts-bucket/builds/${{ github.sha }}/ --acl public-read --no-progress; | ||
file_basename=$(basename $file) | ||
echo "wheel=$file_basename" >> $GITHUB_OUTPUT | ||
echo "Output wheel has been built and stored in S3 at the following location:" >> $GITHUB_STEP_SUMMARY | ||
echo "https://us-west-2.console.aws.amazon.com/s3/buckets/github-actions-artifacts-bucket?prefix=builds/${{ github.sha }}/" >> $GITHUB_STEP_SUMMARY | ||
done | ||
python tools/generate_whl_html_manifest.py |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: I think
{{SHA}}
and{{WHEEL}}
is more common here for templatingThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can update.