diff --git a/requirements-tpu.txt b/requirements-tpu.txt index b8f0b15469e77..8ab18b3770ae8 100644 --- a/requirements-tpu.txt +++ b/requirements-tpu.txt @@ -18,6 +18,8 @@ ray[default] --find-links https://storage.googleapis.com/jax-releases/jaxlib_nightly_releases.html torch==2.6.0.dev20241126+cpu torchvision==0.20.0.dev20241126+cpu -torch_xla[tpu] @ https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch_xla-2.6.0.dev20241126-cp310-cp310-linux_x86_64.whl +torch_xla[tpu] @ https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch_xla-2.6.0.dev20241126-cp39-cp39-linux_x86_64.whl ; python_version == "3.9" +torch_xla[tpu] @ https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch_xla-2.6.0.dev20241126-cp310-cp310-linux_x86_64.whl ; python_version == "3.10" +torch_xla[tpu] @ https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch_xla-2.6.0.dev20241126-cp311-cp311-linux_x86_64.whl ; python_version == "3.11" jaxlib==0.4.36.dev20241122 jax==0.4.36.dev20241122 diff --git a/vllm/worker/tpu_model_runner.py b/vllm/worker/tpu_model_runner.py index 9a054eb8a4cf7..7bdb7f0e2d6a9 100644 --- a/vllm/worker/tpu_model_runner.py +++ b/vllm/worker/tpu_model_runner.py @@ -126,8 +126,10 @@ def __init__( logger.warning( "The max_model_len (%d) is too large. This may degrade the " "performance due to the insufficient smem size. Consider " - "setting --max-model-len to a smaller value.", - self.model_config.max_model_len) + "setting --max-model-len to a smaller value, like %d.", + self.model_config.max_model_len, + self.model_config.max_model_len / + (block_table_size / smem_size)) def load_model(self) -> None: self.device = self.device_config.device