up

Signed-off-by: Rui Qiao <[email protected]>
vllm-project · Nov 25, 2024 · eb8ae49 · eb8ae49
1 parent d9258e3
commit eb8ae49
Show file tree

Hide file tree

Showing 3 changed files with 15 additions and 18 deletions.
diff --git a/tests/basic_correctness/test_basic_correctness.py b/tests/basic_correctness/test_basic_correctness.py
@@ -87,16 +87,15 @@ def test_models(
 @multi_gpu_test(num_gpus=2)
 @pytest.mark.parametrize(
     "model, distributed_executor_backend, attention_backend, "
-    "test_suite",
-    [
-        # ("facebook/opt-125m", "ray", "", "L4"),
-        # ("facebook/opt-125m", "mp", "", "L4"),
+    "test_suite", [
+        ("facebook/opt-125m", "ray", "", "L4"),
+        ("facebook/opt-125m", "mp", "", "L4"),
         ("meta-llama/Llama-2-7b-hf", "ray", "", "L4"),
-        # ("meta-llama/Llama-2-7b-hf", "mp", "", "L4"),
-        # ("facebook/opt-125m", "ray", "", "A100"),
-        # ("facebook/opt-125m", "mp", "", "A100"),
-        # ("facebook/opt-125m", "mp", "FLASHINFER", "A100"),
-        # ("meta-llama/Meta-Llama-3-8B", "ray", "FLASHINFER", "A100"),
+        ("meta-llama/Llama-2-7b-hf", "mp", "", "L4"),
+        ("facebook/opt-125m", "ray", "", "A100"),
+        ("facebook/opt-125m", "mp", "", "A100"),
+        ("facebook/opt-125m", "mp", "FLASHINFER", "A100"),
+        ("meta-llama/Meta-Llama-3-8B", "ray", "FLASHINFER", "A100"),
     ])
 def test_models_distributed(
     hf_runner,

diff --git a/vllm/envs.py b/vllm/envs.py
@@ -46,7 +46,7 @@
     VLLM_USE_RAY_SPMD_WORKER: bool = False
     VLLM_USE_RAY_COMPILED_DAG: bool = False
     VLLM_USE_RAY_COMPILED_DAG_NCCL_CHANNEL: bool = True
-    VLLM_USE_RAY_COMPILED_DAG_COMM_OVERLAP: bool = True
+    VLLM_USE_RAY_COMPILED_DAG_OVERLAP_COMM: bool = True
     VLLM_WORKER_MULTIPROC_METHOD: str = "fork"
     VLLM_ASSETS_CACHE: str = os.path.join(VLLM_CACHE_ROOT, "assets")
     VLLM_IMAGE_FETCH_TIMEOUT: int = 5
@@ -345,8 +345,8 @@ def get_default_config_root():
     # If the env var is set, it enables GPU communication overlap in
     # Ray's compiled DAG. This flag is ignored if
     # VLLM_USE_RAY_COMPILED_DAG is not set.
-    "VLLM_USE_RAY_COMPILED_DAG_COMM_OVERLAP":
-    lambda: bool(int(os.getenv("VLLM_USE_RAY_COMPILED_DAG_COMM_OVERLAP", "1"))
+    "VLLM_USE_RAY_COMPILED_DAG_OVERLAP_COMM":
+    lambda: bool(int(os.getenv("VLLM_USE_RAY_COMPILED_DAG_OVERLAP_COMM", "1"))
                  ),
 
     # Use dedicated multiprocess context for workers.

diff --git a/vllm/executor/ray_gpu_executor.py b/vllm/executor/ray_gpu_executor.py
@@ -425,9 +425,7 @@ def _check_ray_adag_installation(self):
         required_version = version.parse("2.39")
         current_version = version.parse(
             pkg_resources.get_distribution("ray").version)
-        # TODO: check the minimum version as opposed to the exact version
-        # once ray compiled graph is more stable
-        if current_version != required_version:
+        if current_version < required_version:
             raise ValueError(f"Ray version {required_version} is "
                              f"required, but found {current_version}")
 
@@ -453,8 +451,8 @@ def _compiled_ray_dag(self, enable_asyncio: bool):
 
         logger.info("VLLM_USE_RAY_COMPILED_DAG_NCCL_CHANNEL = %s",
                     envs.VLLM_USE_RAY_COMPILED_DAG_NCCL_CHANNEL)
-        logger.info("VLLM_USE_RAY_COMPILED_DAG_COMM_OVERLAP = %s",
-                    envs.VLLM_USE_RAY_COMPILED_DAG_COMM_OVERLAP)
+        logger.info("VLLM_USE_RAY_COMPILED_DAG_OVERLAP_COMM = %s",
+                    envs.VLLM_USE_RAY_COMPILED_DAG_OVERLAP_COMM)
         with InputNode() as input_data:
             # Example DAG: PP=2, TP=4
             # (ExecuteModelReq, None) -> 0 -> (ExecuteModelReq, IntermediateOutput) -> 4 -> SamplerOutput   # noqa: E501
@@ -493,7 +491,7 @@ def _compiled_ray_dag(self, enable_asyncio: bool):
         return forward_dag.experimental_compile(
             enable_asyncio=enable_asyncio,
             _overlap_gpu_communication=envs.
-            VLLM_USE_RAY_COMPILED_DAG_COMM_OVERLAP)
+            VLLM_USE_RAY_COMPILED_DAG_OVERLAP_COMM)
 
     def __del__(self):
         self.shutdown()