vllm-project · khluu · Sep 25, 2024 · Sep 19, 2024 · Sep 19, 2024 · Sep 19, 2024
diff --git a/.gitignore b/.gitignore
@@ -35,4 +35,12 @@ terraform.rc
 
 .env
 
-.vscode/
+.vscode/
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+.cache
+*.log
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,2 @@
+click==8.1.7
+pydantic==2.9.2
diff --git a/scripts/__init__.py b/scripts/__init__.py
diff --git a/scripts/pipeline_generator/__init__.py b/scripts/pipeline_generator/__init__.py
diff --git a/scripts/pipeline_generator/plugin.py b/scripts/pipeline_generator/plugin.py
@@ -0,0 +1,90 @@
+from pydantic import BaseModel, Field
+from typing import List, Dict, Any, Optional
+
+from .utils import HF_HOME
+
+DOCKER_PLUGIN_NAME = "docker#v5.2.0"
+KUBERNETES_PLUGIN_NAME = "kubernetes"
+
+class DockerPluginConfig(BaseModel):
+    image: str = ""
+    always_pull: bool = Field(default=True, alias="always-pull")
+    propagate_environment: bool = Field(default=True, alias="propagate-environment")
+    gpus: Optional[str] = "all"
+    mount_buildkite_agent: Optional[bool] = Field(default=False, alias="mount-buildkite-agent")
+    command: List[str] = Field(default_factory=list)
+    environment: List[str] = [
+        f"HF_HOME={HF_HOME}",
+        "VLLM_USAGE_SOURCE=ci-test",
+        "HF_TOKEN",
+        "BUILDKITE_ANALYTICS_TOKEN"
+    ]
+    volumes: List[str] = [
+        "/dev/shm:/dev/shm",
+        f"{HF_HOME}:{HF_HOME}"
+    ]
+
+class KubernetesPodContainerConfig(BaseModel):
+    image: str
+    command: List[str]
+    resources: Dict[str, Dict[str, int]]
+    volume_mounts: List[Dict[str, str]] = Field(
+        alias="volumeMounts",
+        default=[
+            {"name": "devshm", "mountPath": "/dev/shm"},
+            {"name": "hf-cache", "mountPath": HF_HOME}
+        ]
+    )
+    env: List[Dict[str, str]] = Field(
+        default=[
+            {"name": "HF_HOME", "value": HF_HOME},
+            {"name": "VLLM_USAGE_SOURCE", "value": "ci-test"},
+            {
+                "name": "HF_TOKEN",
+                "valueFrom": {
+                    "secretKeyRef": {
+                        "name": "hf-token-secret",
+                        "key": "token"
+                    }
+                }
+            },
+        ],
+    )
+
+class KubernetesPodSpec(BaseModel):
+    containers: List[KubernetesPodContainerConfig]
+    priority_class_name: str = Field(default="ci", alias="priorityClassName")
+    node_selector: Dict[str, Any] = Field(
+        default={"nvidia.com/gpu.product": "NVIDIA-A100-SXM4-80GB"},
+        alias="nodeSelector"
+    )
+    volumes: List[Dict[str, Any]] = Field(
+        default=[
+            {"name": "devshm", "emptyDir": {"medium": "Memory"}},
+            {"name": "hf-cache", "hostPath": {"path": HF_HOME, "type": "Directory"}}
+        ]
+    )
+
+class KubernetesPluginConfig(BaseModel):
+    pod_spec: KubernetesPodSpec = Field(alias="podSpec")
+
+def get_kubernetes_plugin_config(docker_image_path: str, test_bash_command: List[str], num_gpus: int) -> Dict:
+    pod_spec = KubernetesPodSpec(
+        containers=[
+            KubernetesPodContainerConfig(
+                image=docker_image_path,
+                command=[" ".join(test_bash_command)],
+                resources={"limits": {"nvidia.com/gpu": num_gpus}}
+            )
+        ]
+    )
+    return {KUBERNETES_PLUGIN_NAME: KubernetesPluginConfig(podSpec=pod_spec).dict(by_alias=True)}
+
+def get_docker_plugin_config(docker_image_path: str, test_bash_command: List[str], no_gpu: bool) -> Dict:
+    docker_plugin_config = DockerPluginConfig(
+        image=docker_image_path,
+        command=test_bash_command
+    )
+    if no_gpu:
+        docker_plugin_config.gpus = None
+    return {DOCKER_PLUGIN_NAME: docker_plugin_config.dict(exclude_none=True, by_alias=True)}
diff --git a/scripts/pipeline_generator/utils.py b/scripts/pipeline_generator/utils.py
@@ -0,0 +1,62 @@
+import enum
+from typing import Optional, List
+
+# Constants
+HF_HOME = "/root/.cache/huggingface"
+DEFAULT_WORKING_DIR = "/vllm-workspace/tests"
+VLLM_ECR_URL = "public.ecr.aws/q9t5s3a7"
+VLLM_ECR_REPO = f"{VLLM_ECR_URL}/vllm-ci-test-repo"
+AMD_REPO = "rocm/vllm-ci"
+A100_GPU = "a100"
+
+# File paths
+TEST_PATH = ".buildkite/test-pipeline.yaml"
+EXTERNAL_HARDWARE_TEST_PATH = ".buildkite/external-tests.yaml"
+PIPELINE_FILE_PATH = ".buildkite/pipeline.yaml"
+MULTI_NODE_TEST_SCRIPT = ".buildkite/run-multi-node-test.sh"
+
+STEPS_TO_BLOCK = []
+
+
+class AgentQueue(str, enum.Enum):
+    AWS_CPU = "cpu_queue"
+    AWS_SMALL_CPU = "small_cpu_queue"
+    AWS_1xL4 = "gpu_1_queue"
+    AWS_4xL4 = "gpu_4_queue"
+    A100 = "a100-queue"
+    AMD_GPU = "amd"
+    AMD_CPU = "amd-cpu"
+
+
+def get_agent_queue(no_gpu: Optional[bool], gpu_type: Optional[str], num_gpus: Optional[int]) -> AgentQueue:
+    if no_gpu:
+        return AgentQueue.AWS_SMALL_CPU
+    if gpu_type == A100_GPU:
+        return AgentQueue.A100
+    return AgentQueue.AWS_1xL4 if num_gpus == 1 else AgentQueue.AWS_4xL4
+
+
+def get_full_test_command(test_commands: List[str], step_working_dir: str) -> str:
+    """Convert test commands into one-line command with the right directory."""
+    working_dir = step_working_dir or DEFAULT_WORKING_DIR
+    test_commands_str = "; ".join(test_commands)
+    return f"cd {working_dir}; {test_commands_str}"
+
+
+def get_multi_node_test_command(
+        test_commands: List[str],
+        working_dir: str,
+        num_nodes: int,
+        num_gpus: int,
+        docker_image_path: str
+        ) -> str:
+    quoted_commands = [f"'{command}'" for command in test_commands]
+    multi_node_command = [
+        MULTI_NODE_TEST_SCRIPT,
+        working_dir or DEFAULT_WORKING_DIR,
+        str(num_nodes),
+        str(num_gpus),
+        docker_image_path,
+        *quoted_commands
+    ]
+    return " ".join(map(str, multi_node_command))
diff --git a/scripts/tests/__init__.py b/scripts/tests/__init__.py
diff --git a/scripts/tests/pipeline_generator/__init__.py b/scripts/tests/pipeline_generator/__init__.py
diff --git a/scripts/tests/pipeline_generator/test_plugin.py b/scripts/tests/pipeline_generator/test_plugin.py
@@ -0,0 +1,116 @@
+import pytest
+import sys
+
+from unittest.mock import patch
+from scripts.pipeline_generator.plugin import (
+    get_kubernetes_plugin_config,
+    get_docker_plugin_config,
+    DOCKER_PLUGIN_NAME,
+    KUBERNETES_PLUGIN_NAME,
+)
+
+def test_get_kubernetes_plugin_config():
+    docker_image_path = "test_image:latest"
+    test_bash_command = ["echo", "Hello, Kubernetes!"]
+    num_gpus = 1
+
+    expected_config = {
+        KUBERNETES_PLUGIN_NAME: {
+            "podSpec": {
+                "containers": [
+                    {
+                        "image": docker_image_path,
+                        "command": [" ".join(test_bash_command)],
+                        "resources": {"limits": {"nvidia.com/gpu": num_gpus}},
+                        "volumeMounts": [
+                            {"name": "devshm", "mountPath": "/dev/shm"},
+                            {"name": "hf-cache", "mountPath": "/root/.cache/huggingface"}
+                        ],
+                        "env": [
+                            {"name": "HF_HOME", "value": "/root/.cache/huggingface"},
+                            {"name": "VLLM_USAGE_SOURCE", "value": "ci-test"},
+                            {
+                                "name": "HF_TOKEN",
+                                "valueFrom": {
+                                    "secretKeyRef": {
+                                        "name": "hf-token-secret",
+                                        "key": "token"
+                                    }
+                                }
+                            },
+                        ],
+                    }
+                ],
+                "priorityClassName": "ci",
+                "nodeSelector": {"nvidia.com/gpu.product": "NVIDIA-A100-SXM4-80GB"},
+                "volumes": [
+                    {"name": "devshm", "emptyDir": {"medium": "Memory"}},
+                    {"name": "hf-cache", "hostPath": {"path": "/root/.cache/huggingface", "type": "Directory"}}
+                ]
+            }
+        }
+    }
+
+    assert get_kubernetes_plugin_config(docker_image_path, test_bash_command, num_gpus) == expected_config
+
+
+@pytest.mark.parametrize(
+    "docker_image_path, test_bash_command, no_gpu, expected_config",
+    [
+        (
+            "test_image:latest",
+            ["bash", "-c", "echo A", "pytest -v -s a.py"],
+            False,
+            {
+                DOCKER_PLUGIN_NAME: {
+                    "image": "test_image:latest",
+                    "always-pull": True,
+                    "propagate-environment": True,
+                    "gpus": "all",
+                    "command": ["bash", "-c", "echo A", "pytest -v -s a.py"],
+                    "environment": [
+                        "HF_HOME=/root/.cache/huggingface",
+                        "VLLM_USAGE_SOURCE=ci-test",
+                        "HF_TOKEN",
+                        "BUILDKITE_ANALYTICS_TOKEN"
+                    ],
+                    "mount-buildkite-agent": False,
+                    "volumes": [
+                        "/dev/shm:/dev/shm",
+                        "/root/.cache/huggingface:/root/.cache/huggingface"
+                    ]
+                }
+            }
+        ),
+        (
+            "cpu_image:latest",
+            ["bash", "-c", "echo B", "pytest -v -s b.py"],
+            True,
+            {
+                DOCKER_PLUGIN_NAME: {
+                    "image": "cpu_image:latest",
+                    "always-pull": True,
+                    "propagate-environment": True,
+                    "command": ["bash", "-c", "echo B", "pytest -v -s b.py"],
+                    "environment": [
+                        "HF_HOME=/root/.cache/huggingface",
+                        "VLLM_USAGE_SOURCE=ci-test",
+                        "HF_TOKEN",
+                        "BUILDKITE_ANALYTICS_TOKEN"
+                    ],
+                    "mount-buildkite-agent": False,
+                    "volumes": [
+                        "/dev/shm:/dev/shm",
+                        "/root/.cache/huggingface:/root/.cache/huggingface"
+                    ]
+                }
+            }
+        ),
+    ]
+)
+def test_get_docker_plugin_config(docker_image_path, test_bash_command, no_gpu, expected_config):
+    assert get_docker_plugin_config(docker_image_path, test_bash_command, no_gpu) == expected_config
+
+
+if __name__ == "__main__":
+    sys.exit(pytest.main(["-v", __file__]))
diff --git a/scripts/tests/pipeline_generator/test_utils.py b/scripts/tests/pipeline_generator/test_utils.py
@@ -0,0 +1,66 @@
+import pytest
+import sys
+from typing import List
+
+from scripts.pipeline_generator.utils import (
+    get_agent_queue,
+    get_full_test_command,
+    get_multi_node_test_command,
+    AgentQueue,
+    MULTI_NODE_TEST_SCRIPT,
+)
+
+
+@pytest.mark.parametrize(
+    ("no_gpu", "gpu_type", "num_gpus", "expected_result"),
+    [
+        (True, None, None, AgentQueue.AWS_SMALL_CPU),
+        (False, "a100", None, AgentQueue.A100),
+        (False, None, 1, AgentQueue.AWS_1xL4),
+        (False, None, 4, AgentQueue.AWS_4xL4),
+    ],
+)
+def test_get_agent_queue(no_gpu: bool, gpu_type: str, num_gpus: int, expected_result: AgentQueue):
+    assert get_agent_queue(no_gpu, gpu_type, num_gpus) == expected_result
+
+
+@pytest.mark.parametrize(
+    ("test_commands", "step_working_dir", "expected_result"),
+    [
+        (["echo 'hello'"], None, "cd /vllm-workspace/tests; echo 'hello'"),
+        (["echo 'hello'"], "/vllm-workspace/tests", "cd /vllm-workspace/tests; echo 'hello'"),
+        (["echo 'hello1'", "echo 'hello2'"], None, "cd /vllm-workspace/tests; echo 'hello1'; echo 'hello2'"),
+    ],
+)
+def test_get_full_test_command(test_commands: List[str], step_working_dir: str, expected_result: str):
+    assert get_full_test_command(test_commands, step_working_dir) == expected_result
+
+
+def test_get_multi_node_test_command():
+    test_commands = [
+        (
+            "distributed/test_same_node.py;"
+            "pytest -v -s distributed/test_multi_node_assignment.py;"
+            "pytest -v -s distributed/test_pipeline_parallel.py"
+        ),
+        "distributed/test_same_node.py",
+    ]
+    working_dir = "/vllm-workspace/tests"
+    num_nodes = 2
+    num_gpus = 4
+    docker_image_path = "ecr-path/vllm-ci-test-repo:latest"
+    expected_multi_node_command = [
+        MULTI_NODE_TEST_SCRIPT,
+        working_dir,
+        num_nodes,
+        num_gpus,
+        docker_image_path,
+        f"'{test_commands[0]}'",
+        f"'{test_commands[1]}'",
+    ]
+    expected_result = " ".join(map(str, expected_multi_node_command))
+    assert get_multi_node_test_command(test_commands, working_dir, num_nodes, num_gpus, docker_image_path) == expected_result
+
+
+if __name__ == "__main__":
+    sys.exit(pytest.main(["-v", __file__]))