-
Notifications
You must be signed in to change notification settings - Fork 174
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[FEAT] Add steps to spin up, submit job, and spin down ray clusters (#…
…3403) # Overview - new steps that: - spin up - submit job - spin down ray clusters ## Note If any of the previous steps fail, the "tear-down" step (responsible for tearing down the ray cluster), will still always run. (The only way this tear-down step would not be run is if the workflow is *manually* cancelled).
- Loading branch information
Raunak Bhagat
authored
Nov 23, 2024
1 parent
31a7abc
commit 5dce4fb
Showing
4 changed files
with
132 additions
and
34 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
cluster_name: '{{RAY_CLUSTER_NAME}}' | ||
|
||
provider: | ||
type: aws | ||
region: us-west-2 | ||
cache_stopped_nodes: true | ||
security_group: | ||
GroupName: ray-autoscaler-c1 | ||
|
||
auth: | ||
ssh_user: ubuntu | ||
ssh_private_key: ~/.ssh/ci-github-actions-ray-cluster-key.pem | ||
|
||
max_workers: 2 | ||
available_node_types: | ||
ray.head.default: | ||
resources: {"CPU": 0} | ||
node_config: | ||
KeyName: ci-github-actions-ray-cluster-key | ||
InstanceType: i3.2xlarge | ||
ImageId: ami-04dd23e62ed049936 | ||
IamInstanceProfile: | ||
Name: ray-autoscaler-v1 | ||
|
||
ray.worker.default: | ||
min_workers: 2 | ||
max_workers: 2 | ||
resources: {} | ||
node_config: | ||
KeyName: ci-github-actions-ray-cluster-key | ||
InstanceType: i3.2xlarge | ||
ImageId: ami-04dd23e62ed049936 | ||
IamInstanceProfile: | ||
Name: ray-autoscaler-v1 | ||
|
||
setup_commands: | ||
# Mount drive | ||
- | | ||
findmnt /tmp 1> /dev/null | ||
code=$? | ||
if [ $code -ne 0 ]; then | ||
sudo mkfs.ext4 /dev/nvme0n1 | ||
sudo mount -t ext4 /dev/nvme0n1 /tmp | ||
sudo chmod 777 /tmp | ||
fi | ||
# Install dependencies | ||
# GitHub Actions workflow will replace all parameters between `{{...}}` with the | ||
# actual values as determined dynamically during runtime of the actual workflow. | ||
- sudo snap install aws-cli --classic | ||
- curl -LsSf https://astral.sh/uv/install.sh | sh | ||
- echo 'export PATH="$HOME/.local/bin:$PATH"' >> ~/.bashrc | ||
- source ~/.bashrc | ||
- uv python install {{PYTHON_VERSION}} | ||
- uv python pin {{PYTHON_VERSION}} | ||
- uv v | ||
- echo "source $HOME/.venv/bin/activate" >> $HOME/.bashrc | ||
- source .venv/bin/activate | ||
- uv pip install pip ray[default] py-spy getdaft{{DAFT_VERSION}} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
name: Build a Daft commit and store the outputted wheel in AWS S3 | ||
name: build-commit | ||
|
||
on: | ||
workflow_dispatch: | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
name: run-cluster | ||
|
||
on: | ||
workflow_dispatch: | ||
inputs: | ||
daft_version: | ||
type: string | ||
description: The wheel artifact to use | ||
required: false | ||
python_version: | ||
type: string | ||
description: The version of python to use | ||
required: false | ||
default: "3.9" | ||
|
||
jobs: | ||
run-command: | ||
runs-on: [self-hosted, linux, x64, ci-dev] | ||
timeout-minutes: 15 # Remove for ssh debugging | ||
permissions: | ||
id-token: write | ||
contents: read | ||
steps: | ||
- name: Checkout repo | ||
uses: actions/checkout@v4 | ||
with: | ||
fetch-depth: 1 | ||
- name: Configure AWS credentials | ||
uses: aws-actions/configure-aws-credentials@v4 | ||
with: | ||
aws-region: us-west-2 | ||
role-session-name: run-command-workflow | ||
- name: Install uv, rust, python | ||
uses: ./.github/actions/install | ||
with: | ||
python_version: ${{ inputs.python_version }} | ||
- name: Setup uv environment | ||
run: | | ||
uv v | ||
source .venv/bin/activate | ||
uv pip install ray[default] boto3 | ||
- name: Dynamically update ray config file | ||
run: | | ||
id="ray-ci-run-${{ github.run_id }}_${{ github.run_attempt }}" | ||
sed -i "s|{{RAY_CLUSTER_NAME}}|$id|g" .github/assets/benchmarking_ray_config.yaml | ||
sed -i 's|{{PYTHON_VERSION}}|${{ inputs.python_version }}|g' .github/assets/benchmarking_ray_config.yaml | ||
if [[ '${{ inputs.daft_version }}' ]]; then | ||
sed -i 's|{{DAFT_VERSION}}|==${{ inputs.daft_version }}|g' .github/assets/benchmarking_ray_config.yaml | ||
else | ||
sed -i 's|{{DAFT_VERSION}}||g' .github/assets/benchmarking_ray_config.yaml | ||
fi | ||
- name: Download private ssh key | ||
run: | | ||
KEY=$(aws secretsmanager get-secret-value --secret-id ci-github-actions-ray-cluster-key-3 --query SecretString --output text) | ||
echo "$KEY" >> ~/.ssh/ci-github-actions-ray-cluster-key.pem | ||
chmod 600 ~/.ssh/ci-github-actions-ray-cluster-key.pem | ||
- name: Spin up ray cluster | ||
run: | | ||
source .venv/bin/activate | ||
ray up .github/assets/benchmarking_ray_config.yaml -y | ||
- name: Setup connection to ray cluster | ||
run: | | ||
source .venv/bin/activate | ||
ray dashboard .github/assets/benchmarking_ray_config.yaml & | ||
- name: Submit job to ray cluster | ||
run: | | ||
source .venv/bin/activate | ||
ray job submit --address http://localhost:8265 -- python -c "print('Hello, world!')" | ||
- name: Spin down ray cluster | ||
if: always() | ||
run: | | ||
source .venv/bin/activate | ||
ray down .github/assets/benchmarking_ray_config.yaml -y |
This file was deleted.
Oops, something went wrong.