Skip to content

Commit

Permalink
Merge branch 'main' of github.com:vllm-project/buildkite-ci into main
Browse files Browse the repository at this point in the history
  • Loading branch information
khluu committed Dec 3, 2024
2 parents e07a900 + 807266e commit 4f70d03
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 7 deletions.
14 changes: 14 additions & 0 deletions scripts/test-template-aws.j2
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
{% set docker_image = "public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT" %}
{% if branch == "main" %}
{% set docker_image = "public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:$BUILDKITE_COMMIT" %}
{% endif %}
{% set docker_image_amd = "rocm/vllm-ci:$BUILDKITE_COMMIT" %}
{% set default_working_dir = "/vllm-workspace/tests" %}
{% set hf_home = "/root/.cache/huggingface" %}
Expand Down Expand Up @@ -185,6 +188,8 @@ steps:
limit: 2
- exit_status: -10 # Agent was lost
limit: 2
- exit_status: 1 # Machine occasionally fail
limit: 2
agents:
queue: amd-cpu

Expand All @@ -204,6 +209,15 @@ steps:
{% else %}
soft_fail: false
{% endif %}
retry:
automatic:
- exit_status: -1 # Agent was lost
limit: 2
- exit_status: -10 # Agent was lost
limit: 2
- exit_status: 1 # Machine occasionally fail
limit: 2

{% endif %}
{% endfor %}

Expand Down
14 changes: 7 additions & 7 deletions scripts/test-template-fastcheck.j2
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ steps:
- label: ":docker: build image"
key: image-build
agents:
queue: cpu_queue
queue: cpu_queue_premerge
commands:
- "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
- "docker build --build-arg max_jobs=16 --build-arg buildkite_commit=$BUILDKITE_COMMIT --build-arg USE_SCCACHE=1 --tag {{ docker_image }} --target test --progress plain ."
Expand Down Expand Up @@ -38,9 +38,9 @@ steps:
depends_on: image-build
agents:
{% if step.label == "Documentation Build" %}
queue: small_cpu_queue
queue: small_cpu_queue_premerge
{% elif step.no_gpu %}
queue: cpu_queue
queue: cpu_queue_premerge
{% elif step.num_gpus == 2 or step.num_gpus == 4 %}
queue: gpu_4_queue
{% else %}
Expand Down Expand Up @@ -91,9 +91,9 @@ steps:
depends_on: block-{{ step.label | replace(" ", "-") | lower | replace("(", "") | replace(")", "") | replace("%", "") | replace(",", "-") }}
agents:
{% if step.label == "Documentation Build" %}
queue: small_cpu_queue
queue: small_cpu_queue_premerge
{% elif step.no_gpu %}
queue: cpu_queue
queue: cpu_queue_premerge
{% elif step.num_gpus == 2 or step.num_gpus == 4 %}
queue: gpu_4_queue
{% else %}
Expand Down Expand Up @@ -157,7 +157,7 @@ steps:
- label: "{{ step.label }}"
priority: 10000
agents:
queue: a100-queue
queue: a100_queue
soft_fail: {{ step.soft_fail or false }}
{% if step.parallelism %}
parallelism: {{ step.parallelism }}
Expand Down Expand Up @@ -212,7 +212,7 @@ steps:
- label: "TPU Test"
depends_on: ~
agents:
queue: tpu
queue: tpu_queue
commands:
- if [[ -f ".buildkite/run-tpu-test.sh" ]]; then bash .buildkite/run-tpu-test.sh; fi
- yes | docker system prune -a

0 comments on commit 4f70d03

Please sign in to comment.