-
Notifications
You must be signed in to change notification settings - Fork 174
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
# Overview - When running a job on a ray-cluster using GHA, we want to be able to configure the configuration of the cluster. - this is achieved via "cluster profiles" - essentially, this is a set number of configurations (e.g., `medium-x86`, `debug_xs-x86`, etc.) that end-users can select from - this will take care of all of the configurations without leaking the internals of the ray-configuration story ## Available Options - `medium-x86` - `debug_xs-x86` I will plan on adding more in the future. For now, this should suffice.
- Loading branch information
Raunak Bhagat
authored
Dec 1, 2024
1 parent
8652eba
commit b5f60e0
Showing
3 changed files
with
127 additions
and
36 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
import sys | ||
from argparse import ArgumentParser | ||
from dataclasses import dataclass | ||
from typing import Optional | ||
|
||
CLUSTER_NAME_PLACEHOLDER = "\\{{CLUSTER_NAME}}" | ||
DAFT_VERSION_PLACEHOLDER = "\\{{DAFT_VERSION}}" | ||
PYTHON_VERSION_PLACEHOLDER = "\\{{PYTHON_VERSION}}" | ||
CLUSTER_PROFILE__NODE_COUNT = "\\{{CLUSTER_PROFILE/node_count}}" | ||
CLUSTER_PROFILE__INSTANCE_TYPE = "\\{{CLUSTER_PROFILE/instance_type}}" | ||
CLUSTER_PROFILE__IMAGE_ID = "\\{{CLUSTER_PROFILE/image_id}}" | ||
CLUSTER_PROFILE__SSH_USER = "\\{{CLUSTER_PROFILE/ssh_user}}" | ||
CLUSTER_PROFILE__VOLUME_MOUNT = "\\{{CLUSTER_PROFILE/volume_mount}}" | ||
|
||
|
||
@dataclass | ||
class Profile: | ||
node_count: int | ||
instance_type: str | ||
image_id: str | ||
ssh_user: str | ||
volume_mount: Optional[str] = None | ||
|
||
|
||
profiles: dict[str, Optional[Profile]] = { | ||
"debug_xs-x86": Profile( | ||
instance_type="t3.large", | ||
image_id="ami-04dd23e62ed049936", | ||
node_count=1, | ||
ssh_user="ubuntu", | ||
), | ||
"medium-x86": Profile( | ||
instance_type="i3.2xlarge", | ||
image_id="ami-04dd23e62ed049936", | ||
node_count=4, | ||
ssh_user="ubuntu", | ||
volume_mount=""" | | ||
findmnt /tmp 1> /dev/null | ||
code=$? | ||
if [ $code -ne 0 ]; then | ||
sudo mkfs.ext4 /dev/nvme0n1 | ||
sudo mount -t ext4 /dev/nvme0n1 /tmp | ||
sudo chmod 777 /tmp | ||
fi""", | ||
), | ||
} | ||
|
||
|
||
if __name__ == "__main__": | ||
content = sys.stdin.read() | ||
|
||
parser = ArgumentParser() | ||
parser.add_argument("--cluster-name") | ||
parser.add_argument("--daft-version") | ||
parser.add_argument("--python-version") | ||
parser.add_argument("--cluster-profile") | ||
args = parser.parse_args() | ||
|
||
if args.cluster_name: | ||
content = content.replace(CLUSTER_NAME_PLACEHOLDER, args.cluster_name) | ||
|
||
if args.daft_version: | ||
content = content.replace(DAFT_VERSION_PLACEHOLDER, f"=={args.daft_version}") | ||
else: | ||
content = content.replace(DAFT_VERSION_PLACEHOLDER, "") | ||
|
||
if args.python_version: | ||
content = content.replace(PYTHON_VERSION_PLACEHOLDER, args.python_version) | ||
|
||
if cluster_profile := args.cluster_profile: | ||
cluster_profile: str | ||
if cluster_profile not in profiles: | ||
raise Exception(f'Cluster profile "{cluster_profile}" not found') | ||
|
||
profile = profiles[cluster_profile] | ||
if profile is None: | ||
raise Exception(f'Cluster profile "{cluster_profile}" not yet implemented') | ||
|
||
assert profile is not None | ||
content = content.replace(CLUSTER_PROFILE__NODE_COUNT, str(profile.node_count)) | ||
content = content.replace(CLUSTER_PROFILE__INSTANCE_TYPE, profile.instance_type) | ||
content = content.replace(CLUSTER_PROFILE__IMAGE_ID, profile.image_id) | ||
content = content.replace(CLUSTER_PROFILE__SSH_USER, profile.ssh_user) | ||
if profile.volume_mount: | ||
content = content.replace(CLUSTER_PROFILE__VOLUME_MOUNT, profile.volume_mount) | ||
else: | ||
content = content.replace(CLUSTER_PROFILE__VOLUME_MOUNT, "echo 'Nothing to mount; skipping'") | ||
|
||
print(content) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters