-
Notifications
You must be signed in to change notification settings - Fork 16
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Pipeline generator plugins #33
Changes from all commits
717b163
e3aac9c
49043e5
72f1fd2
3cdd2b3
ab323d9
ec9e184
3615f75
f1a2975
a315f66
9e52954
9d973dc
a040279
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
from pydantic import BaseModel, Field | ||
from typing import List, Dict, Any, Optional | ||
|
||
from .utils import HF_HOME | ||
|
||
DOCKER_PLUGIN_NAME = "docker#v5.2.0" | ||
KUBERNETES_PLUGIN_NAME = "kubernetes" | ||
|
||
|
||
class DockerPluginConfig(BaseModel): | ||
""" | ||
Configuration for Docker plugin running in a Buildkite step. | ||
The specification is based on: | ||
https://github.com/buildkite-plugins/docker-buildkite-plugin?tab=readme-ov-file#configuration | ||
""" | ||
image: str = "" | ||
always_pull: bool = Field(default=True, alias="always-pull") | ||
propagate_environment: bool = Field(default=True, alias="propagate-environment") | ||
gpus: Optional[str] = "all" | ||
mount_buildkite_agent: Optional[bool] = Field(default=False, alias="mount-buildkite-agent") | ||
command: List[str] = Field(default_factory=list) | ||
environment: List[str] = [ | ||
f"HF_HOME={HF_HOME}", | ||
"VLLM_USAGE_SOURCE=ci-test", | ||
"HF_TOKEN", | ||
"BUILDKITE_ANALYTICS_TOKEN" | ||
] | ||
volumes: List[str] = [ | ||
"/dev/shm:/dev/shm", | ||
f"{HF_HOME}:{HF_HOME}" | ||
] | ||
|
||
|
||
class KubernetesPodContainerConfig(BaseModel): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we cancelled the plan to move back to k8s stack? why are we adding this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is for the A100 node on EKS (roblox aws) |
||
""" | ||
Configuration for a container running in a Kubernetes pod. | ||
""" | ||
image: str | ||
command: List[str] | ||
resources: Dict[str, Dict[str, int]] | ||
volume_mounts: List[Dict[str, str]] = Field( | ||
alias="volumeMounts", | ||
default=[ | ||
{"name": "devshm", "mountPath": "/dev/shm"}, | ||
{"name": "hf-cache", "mountPath": HF_HOME} | ||
] | ||
) | ||
env: List[Dict[str, str]] = Field( | ||
default=[ | ||
{"name": "HF_HOME", "value": HF_HOME}, | ||
{"name": "VLLM_USAGE_SOURCE", "value": "ci-test"}, | ||
{ | ||
"name": "HF_TOKEN", | ||
"valueFrom": { | ||
"secretKeyRef": { | ||
"name": "hf-token-secret", | ||
"key": "token" | ||
} | ||
} | ||
}, | ||
], | ||
) | ||
|
||
|
||
class KubernetesPodSpec(BaseModel): | ||
""" | ||
Configuration for a Kubernetes pod running in a Buildkite step. | ||
""" | ||
containers: List[KubernetesPodContainerConfig] | ||
priority_class_name: str = Field(default="ci", alias="priorityClassName") | ||
node_selector: Dict[str, Any] = Field( | ||
default={"nvidia.com/gpu.product": "NVIDIA-A100-SXM4-80GB"}, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. where does this |
||
alias="nodeSelector" | ||
) | ||
volumes: List[Dict[str, Any]] = Field( | ||
default=[ | ||
{"name": "devshm", "emptyDir": {"medium": "Memory"}}, | ||
{"name": "hf-cache", "hostPath": {"path": HF_HOME, "type": "Directory"}} | ||
] | ||
) | ||
|
||
|
||
class KubernetesPluginConfig(BaseModel): | ||
""" | ||
Configuration for Kubernetes plugin running in a Buildkite step. | ||
""" | ||
pod_spec: KubernetesPodSpec = Field(alias="podSpec") | ||
|
||
|
||
def get_kubernetes_plugin_config(container_image: str, test_bash_command: List[str], num_gpus: int) -> Dict: | ||
pod_spec = KubernetesPodSpec( | ||
containers=[ | ||
KubernetesPodContainerConfig( | ||
image=container_image, | ||
command=[" ".join(test_bash_command)], | ||
resources={"limits": {"nvidia.com/gpu": num_gpus}} | ||
) | ||
] | ||
) | ||
return {KUBERNETES_PLUGIN_NAME: KubernetesPluginConfig(podSpec=pod_spec).dict(by_alias=True)} | ||
|
||
|
||
def get_docker_plugin_config(docker_image_path: str, test_bash_command: List[str], no_gpu: bool) -> Dict: | ||
docker_plugin_config = DockerPluginConfig( | ||
image=docker_image_path, | ||
command=test_bash_command | ||
) | ||
if no_gpu: | ||
docker_plugin_config.gpus = None | ||
return {DOCKER_PLUGIN_NAME: docker_plugin_config.dict(exclude_none=True, by_alias=True)} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
import pytest | ||
import sys | ||
|
||
from scripts.pipeline_generator.plugin import ( | ||
get_kubernetes_plugin_config, | ||
get_docker_plugin_config, | ||
DOCKER_PLUGIN_NAME, | ||
KUBERNETES_PLUGIN_NAME, | ||
) | ||
|
||
|
||
def test_get_kubernetes_plugin_config(): | ||
docker_image_path = "test_image:latest" | ||
test_bash_command = ["echo", "Hello, Kubernetes!"] | ||
num_gpus = 1 | ||
|
||
expected_config = { | ||
KUBERNETES_PLUGIN_NAME: { | ||
"podSpec": { | ||
"containers": [ | ||
{ | ||
"image": docker_image_path, | ||
"command": [" ".join(test_bash_command)], | ||
"resources": {"limits": {"nvidia.com/gpu": num_gpus}}, | ||
"volumeMounts": [ | ||
{"name": "devshm", "mountPath": "/dev/shm"}, | ||
{"name": "hf-cache", "mountPath": "/root/.cache/huggingface"} | ||
], | ||
"env": [ | ||
{"name": "HF_HOME", "value": "/root/.cache/huggingface"}, | ||
{"name": "VLLM_USAGE_SOURCE", "value": "ci-test"}, | ||
{ | ||
"name": "HF_TOKEN", | ||
"valueFrom": { | ||
"secretKeyRef": { | ||
"name": "hf-token-secret", | ||
"key": "token" | ||
} | ||
} | ||
}, | ||
], | ||
} | ||
], | ||
"priorityClassName": "ci", | ||
"nodeSelector": {"nvidia.com/gpu.product": "NVIDIA-A100-SXM4-80GB"}, | ||
"volumes": [ | ||
{"name": "devshm", "emptyDir": {"medium": "Memory"}}, | ||
{"name": "hf-cache", "hostPath": {"path": "/root/.cache/huggingface", "type": "Directory"}} | ||
] | ||
} | ||
} | ||
} | ||
|
||
assert get_kubernetes_plugin_config(docker_image_path, test_bash_command, num_gpus) == expected_config | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"docker_image_path, test_bash_command, no_gpu, expected_config", | ||
[ | ||
( | ||
"test_image:latest", | ||
["bash", "-c", "echo A;\npytest -v -s a.py"], | ||
False, | ||
{ | ||
DOCKER_PLUGIN_NAME: { | ||
"image": "test_image:latest", | ||
"always-pull": True, | ||
"propagate-environment": True, | ||
"gpus": "all", | ||
"command": ["bash", "-c", "echo A;\npytest -v -s a.py"], | ||
"environment": [ | ||
"HF_HOME=/root/.cache/huggingface", | ||
"VLLM_USAGE_SOURCE=ci-test", | ||
"HF_TOKEN", | ||
"BUILDKITE_ANALYTICS_TOKEN" | ||
], | ||
"mount-buildkite-agent": False, | ||
"volumes": [ | ||
"/dev/shm:/dev/shm", | ||
"/root/.cache/huggingface:/root/.cache/huggingface" | ||
] | ||
} | ||
} | ||
), | ||
( | ||
"cpu_image:latest", | ||
["bash", "-c", "echo B;\npytest -v -s b.py"], | ||
True, | ||
{ | ||
DOCKER_PLUGIN_NAME: { | ||
"image": "cpu_image:latest", | ||
"always-pull": True, | ||
"propagate-environment": True, | ||
"command": ["bash", "-c", "echo B;\npytest -v -s b.py"], | ||
"environment": [ | ||
"HF_HOME=/root/.cache/huggingface", | ||
"VLLM_USAGE_SOURCE=ci-test", | ||
"HF_TOKEN", | ||
"BUILDKITE_ANALYTICS_TOKEN" | ||
], | ||
"mount-buildkite-agent": False, | ||
"volumes": [ | ||
"/dev/shm:/dev/shm", | ||
"/root/.cache/huggingface:/root/.cache/huggingface" | ||
] | ||
} | ||
} | ||
), | ||
] | ||
) | ||
def test_get_docker_plugin_config(docker_image_path, test_bash_command, no_gpu, expected_config): | ||
assert get_docker_plugin_config(docker_image_path, test_bash_command, no_gpu) == expected_config | ||
|
||
|
||
if __name__ == "__main__": | ||
sys.exit(pytest.main(["-v", __file__])) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
could you add some doc / comments, like for everything?
like maybe some link pointers to where the golden spec of these structs are defined?