Skip to content

Commit

Permalink
up
Browse files Browse the repository at this point in the history
Signed-off-by: Rui Qiao <[email protected]>
  • Loading branch information
Rui Qiao committed Nov 25, 2024
1 parent d9258e3 commit eb8ae49
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 18 deletions.
17 changes: 8 additions & 9 deletions tests/basic_correctness/test_basic_correctness.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,16 +87,15 @@ def test_models(
@multi_gpu_test(num_gpus=2)
@pytest.mark.parametrize(
"model, distributed_executor_backend, attention_backend, "
"test_suite",
[
# ("facebook/opt-125m", "ray", "", "L4"),
# ("facebook/opt-125m", "mp", "", "L4"),
"test_suite", [
("facebook/opt-125m", "ray", "", "L4"),
("facebook/opt-125m", "mp", "", "L4"),
("meta-llama/Llama-2-7b-hf", "ray", "", "L4"),
# ("meta-llama/Llama-2-7b-hf", "mp", "", "L4"),
# ("facebook/opt-125m", "ray", "", "A100"),
# ("facebook/opt-125m", "mp", "", "A100"),
# ("facebook/opt-125m", "mp", "FLASHINFER", "A100"),
# ("meta-llama/Meta-Llama-3-8B", "ray", "FLASHINFER", "A100"),
("meta-llama/Llama-2-7b-hf", "mp", "", "L4"),
("facebook/opt-125m", "ray", "", "A100"),
("facebook/opt-125m", "mp", "", "A100"),
("facebook/opt-125m", "mp", "FLASHINFER", "A100"),
("meta-llama/Meta-Llama-3-8B", "ray", "FLASHINFER", "A100"),
])
def test_models_distributed(
hf_runner,
Expand Down
6 changes: 3 additions & 3 deletions vllm/envs.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
VLLM_USE_RAY_SPMD_WORKER: bool = False
VLLM_USE_RAY_COMPILED_DAG: bool = False
VLLM_USE_RAY_COMPILED_DAG_NCCL_CHANNEL: bool = True
VLLM_USE_RAY_COMPILED_DAG_COMM_OVERLAP: bool = True
VLLM_USE_RAY_COMPILED_DAG_OVERLAP_COMM: bool = True
VLLM_WORKER_MULTIPROC_METHOD: str = "fork"
VLLM_ASSETS_CACHE: str = os.path.join(VLLM_CACHE_ROOT, "assets")
VLLM_IMAGE_FETCH_TIMEOUT: int = 5
Expand Down Expand Up @@ -345,8 +345,8 @@ def get_default_config_root():
# If the env var is set, it enables GPU communication overlap in
# Ray's compiled DAG. This flag is ignored if
# VLLM_USE_RAY_COMPILED_DAG is not set.
"VLLM_USE_RAY_COMPILED_DAG_COMM_OVERLAP":
lambda: bool(int(os.getenv("VLLM_USE_RAY_COMPILED_DAG_COMM_OVERLAP", "1"))
"VLLM_USE_RAY_COMPILED_DAG_OVERLAP_COMM":
lambda: bool(int(os.getenv("VLLM_USE_RAY_COMPILED_DAG_OVERLAP_COMM", "1"))
),

# Use dedicated multiprocess context for workers.
Expand Down
10 changes: 4 additions & 6 deletions vllm/executor/ray_gpu_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -425,9 +425,7 @@ def _check_ray_adag_installation(self):
required_version = version.parse("2.39")
current_version = version.parse(
pkg_resources.get_distribution("ray").version)
# TODO: check the minimum version as opposed to the exact version
# once ray compiled graph is more stable
if current_version != required_version:
if current_version < required_version:
raise ValueError(f"Ray version {required_version} is "
f"required, but found {current_version}")

Expand All @@ -453,8 +451,8 @@ def _compiled_ray_dag(self, enable_asyncio: bool):

logger.info("VLLM_USE_RAY_COMPILED_DAG_NCCL_CHANNEL = %s",
envs.VLLM_USE_RAY_COMPILED_DAG_NCCL_CHANNEL)
logger.info("VLLM_USE_RAY_COMPILED_DAG_COMM_OVERLAP = %s",
envs.VLLM_USE_RAY_COMPILED_DAG_COMM_OVERLAP)
logger.info("VLLM_USE_RAY_COMPILED_DAG_OVERLAP_COMM = %s",
envs.VLLM_USE_RAY_COMPILED_DAG_OVERLAP_COMM)
with InputNode() as input_data:
# Example DAG: PP=2, TP=4
# (ExecuteModelReq, None) -> 0 -> (ExecuteModelReq, IntermediateOutput) -> 4 -> SamplerOutput # noqa: E501
Expand Down Expand Up @@ -493,7 +491,7 @@ def _compiled_ray_dag(self, enable_asyncio: bool):
return forward_dag.experimental_compile(
enable_asyncio=enable_asyncio,
_overlap_gpu_communication=envs.
VLLM_USE_RAY_COMPILED_DAG_COMM_OVERLAP)
VLLM_USE_RAY_COMPILED_DAG_OVERLAP_COMM)

def __del__(self):
self.shutdown()
Expand Down

0 comments on commit eb8ae49

Please sign in to comment.