Skip to content

Commit

Permalink
Pipeline generator utils (#31)
Browse files Browse the repository at this point in the history
* p

Signed-off-by: kevin <[email protected]>

* p

Signed-off-by: kevin <[email protected]>

* p

Signed-off-by: kevin <[email protected]>

* p

Signed-off-by: kevin <[email protected]>

* p

Signed-off-by: kevin <[email protected]>

* p

Signed-off-by: kevin <[email protected]>

* add req

Signed-off-by: kevin <[email protected]>

* p

Signed-off-by: kevin <[email protected]>

---------

Signed-off-by: kevin <[email protected]>
  • Loading branch information
khluu authored Sep 20, 2024
1 parent 99ea02d commit 8272857
Show file tree
Hide file tree
Showing 8 changed files with 139 additions and 1 deletion.
10 changes: 9 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,12 @@ terraform.rc

.env

.vscode/
.vscode/

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

.cache
*.log
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
click==8.1.7
pydantic==2.9.2
Empty file added scripts/__init__.py
Empty file.
Empty file.
62 changes: 62 additions & 0 deletions scripts/pipeline_generator/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import enum
from typing import Optional, List

# Constants
HF_HOME = "/root/.cache/huggingface"
DEFAULT_WORKING_DIR = "/vllm-workspace/tests"
VLLM_ECR_URL = "public.ecr.aws/q9t5s3a7"
VLLM_ECR_REPO = f"{VLLM_ECR_URL}/vllm-ci-test-repo"
AMD_REPO = "rocm/vllm-ci"
A100_GPU = "a100"

# File paths
TEST_PATH = ".buildkite/test-pipeline.yaml"
EXTERNAL_HARDWARE_TEST_PATH = ".buildkite/external-tests.yaml"
PIPELINE_FILE_PATH = ".buildkite/pipeline.yaml"
MULTI_NODE_TEST_SCRIPT = ".buildkite/run-multi-node-test.sh"

STEPS_TO_BLOCK = []


class AgentQueue(str, enum.Enum):
AWS_CPU = "cpu_queue"
AWS_SMALL_CPU = "small_cpu_queue"
AWS_1xL4 = "gpu_1_queue"
AWS_4xL4 = "gpu_4_queue"
A100 = "a100-queue"
AMD_GPU = "amd"
AMD_CPU = "amd-cpu"


def get_agent_queue(no_gpu: Optional[bool], gpu_type: Optional[str], num_gpus: Optional[int]) -> AgentQueue:
if no_gpu:
return AgentQueue.AWS_SMALL_CPU
if gpu_type == A100_GPU:
return AgentQueue.A100
return AgentQueue.AWS_1xL4 if num_gpus == 1 else AgentQueue.AWS_4xL4


def get_full_test_command(test_commands: List[str], step_working_dir: str) -> str:
"""Convert test commands into one-line command with the right directory."""
working_dir = step_working_dir or DEFAULT_WORKING_DIR
test_commands_str = ";\n".join(test_commands)
return f"cd {working_dir};\n{test_commands_str}"


def get_multi_node_test_command(
test_commands: List[str],
working_dir: str,
num_nodes: int,
num_gpus: int,
docker_image_path: str
) -> str:
quoted_commands = [f"'{command}'" for command in test_commands]
multi_node_command = [
MULTI_NODE_TEST_SCRIPT,
working_dir or DEFAULT_WORKING_DIR,
str(num_nodes),
str(num_gpus),
docker_image_path,
*quoted_commands
]
return " ".join(map(str, multi_node_command))
Empty file added scripts/tests/__init__.py
Empty file.
Empty file.
66 changes: 66 additions & 0 deletions scripts/tests/pipeline_generator/test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import pytest
import sys
from typing import List

from scripts.pipeline_generator.utils import (
get_agent_queue,
get_full_test_command,
get_multi_node_test_command,
AgentQueue,
MULTI_NODE_TEST_SCRIPT,
)


@pytest.mark.parametrize(
("no_gpu", "gpu_type", "num_gpus", "expected_result"),
[
(True, None, None, AgentQueue.AWS_SMALL_CPU),
(False, "a100", None, AgentQueue.A100),
(False, None, 1, AgentQueue.AWS_1xL4),
(False, None, 4, AgentQueue.AWS_4xL4),
],
)
def test_get_agent_queue(no_gpu: bool, gpu_type: str, num_gpus: int, expected_result: AgentQueue):
assert get_agent_queue(no_gpu, gpu_type, num_gpus) == expected_result


@pytest.mark.parametrize(
("test_commands", "step_working_dir", "expected_result"),
[
(["echo 'hello'"], None, "cd /vllm-workspace/tests;\necho 'hello'"),
(["echo 'hello'"], "/vllm-workspace/tests", "cd /vllm-workspace/tests;\necho 'hello'"),
(["echo 'hello1'", "echo 'hello2'"], None, "cd /vllm-workspace/tests;\necho 'hello1';\necho 'hello2'"),
],
)
def test_get_full_test_command(test_commands: List[str], step_working_dir: str, expected_result: str):
assert get_full_test_command(test_commands, step_working_dir) == expected_result


def test_get_multi_node_test_command():
test_commands = [
(
"distributed/test_same_node.py;"
"pytest -v -s distributed/test_multi_node_assignment.py;"
"pytest -v -s distributed/test_pipeline_parallel.py"
),
"distributed/test_same_node.py",
]
working_dir = "/vllm-workspace/tests"
num_nodes = 2
num_gpus = 4
docker_image_path = "ecr-path/vllm-ci-test-repo:latest"
expected_multi_node_command = [
MULTI_NODE_TEST_SCRIPT,
working_dir,
num_nodes,
num_gpus,
docker_image_path,
f"'{test_commands[0]}'",
f"'{test_commands[1]}'",
]
expected_result = " ".join(map(str, expected_multi_node_command))
assert get_multi_node_test_command(test_commands, working_dir, num_nodes, num_gpus, docker_image_path) == expected_result


if __name__ == "__main__":
sys.exit(pytest.main(["-v", __file__]))

0 comments on commit 8272857

Please sign in to comment.