From be1ff8f0d2c77a67704e19818a049bbde1c63acc Mon Sep 17 00:00:00 2001 From: Jee Jee Li Date: Sat, 23 Nov 2024 15:30:52 +0000 Subject: [PATCH 1/2] Done Signed-off-by: Jee Jee Li --- .buildkite/test-pipeline.yaml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index c436d2b48d20f..a87f2f8dc383c 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -52,6 +52,7 @@ steps: - tests/worker - tests/test_lazy_torch_compile.py commands: + - echo "Running test_lazy_torch_compile.py..." # print running script to enhance CI log readability - python3 test_lazy_torch_compile.py - pytest -v -s mq_llm_engine # MQLLMEngine - pytest -v -s async_engine # AsyncLLMEngine @@ -182,15 +183,25 @@ steps: - examples/ commands: - pip install awscli tensorizer # for llava example and tensorizer test + - echo "Running offline_inference.py..." # print running script to enhance CI log readability - python3 offline_inference.py + - echo "Running cpu_offload.py..." - python3 cpu_offload.py + - echo "Running offline_inference_chat.py..." - python3 offline_inference_chat.py + - echo "Running offline_inference_with_prefix.py..." - python3 offline_inference_with_prefix.py + - echo "Running llm_engine_example.py..." - python3 llm_engine_example.py + - echo "Running offline_inference_vision_language.py..." - python3 offline_inference_vision_language.py + - echo "Running offline_inference_vision_language_multi_image.py..." - python3 offline_inference_vision_language_multi_image.py + - echo "Running tensorize_vllm_model.py..." - python3 tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors + - echo "Running offline_inference_encoder_decoder.py..." - python3 offline_inference_encoder_decoder.py + - echo "Running offline_profile.py..." - python3 offline_profile.py --model facebook/opt-125m - label: Prefix Caching Test # 9min From 0a21c9b38dc1742b6dc7992151c0130972c840dd Mon Sep 17 00:00:00 2001 From: Jee Jee Li Date: Sat, 23 Nov 2024 16:02:41 +0000 Subject: [PATCH 2/2] Single quotes done Signed-off-by: Jee Jee Li --- .buildkite/test-pipeline.yaml | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index a87f2f8dc383c..dfa76ecc5e83e 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -52,7 +52,7 @@ steps: - tests/worker - tests/test_lazy_torch_compile.py commands: - - echo "Running test_lazy_torch_compile.py..." # print running script to enhance CI log readability + - echo 'Running test_lazy_torch_compile.py...' # print running script to enhance CI log readability - python3 test_lazy_torch_compile.py - pytest -v -s mq_llm_engine # MQLLMEngine - pytest -v -s async_engine # AsyncLLMEngine @@ -183,25 +183,25 @@ steps: - examples/ commands: - pip install awscli tensorizer # for llava example and tensorizer test - - echo "Running offline_inference.py..." # print running script to enhance CI log readability + - echo 'Running offline_inference.py...' # print running script to enhance CI log readability - python3 offline_inference.py - - echo "Running cpu_offload.py..." + - echo 'Running cpu_offload.py...' - python3 cpu_offload.py - - echo "Running offline_inference_chat.py..." + - echo 'Running offline_inference_chat.py...' - python3 offline_inference_chat.py - - echo "Running offline_inference_with_prefix.py..." + - echo 'Running offline_inference_with_prefix.py...' - python3 offline_inference_with_prefix.py - - echo "Running llm_engine_example.py..." + - echo 'Running llm_engine_example.py...' - python3 llm_engine_example.py - - echo "Running offline_inference_vision_language.py..." + - echo 'Running offline_inference_vision_language.py...' - python3 offline_inference_vision_language.py - - echo "Running offline_inference_vision_language_multi_image.py..." + - echo 'Running offline_inference_vision_language_multi_image.py...' - python3 offline_inference_vision_language_multi_image.py - - echo "Running tensorize_vllm_model.py..." + - echo 'Running tensorize_vllm_model.py...' - python3 tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors - - echo "Running offline_inference_encoder_decoder.py..." + - echo 'Running offline_inference_encoder_decoder.py...' - python3 offline_inference_encoder_decoder.py - - echo "Running offline_profile.py..." + - echo 'Running offline_profile.py...' - python3 offline_profile.py --model facebook/opt-125m - label: Prefix Caching Test # 9min