From 62a1ddbf8ae483d485e14162ed802f40fa2dfd27 Mon Sep 17 00:00:00 2001 From: fxmarty <9808326+fxmarty@users.noreply.github.com> Date: Thu, 20 Jun 2024 18:35:00 +0000 Subject: [PATCH] poetry actually can't handle the conflict between torch and nccl --- Dockerfile | 5 +++-- server/pyproject.toml | 3 --- server/requirements_cuda.txt | 38 +++++++++++++++++------------------- server/requirements_rocm.txt | 38 +++++++++++++++++------------------- 4 files changed, 39 insertions(+), 45 deletions(-) diff --git a/Dockerfile b/Dockerfile index fa9a45bfd5a..3f605393837 100644 --- a/Dockerfile +++ b/Dockerfile @@ -39,7 +39,7 @@ RUN cargo build --profile release-opt # Adapted from: https://github.com/pytorch/pytorch/blob/master/Dockerfile FROM nvidia/cuda:12.1.0-devel-ubuntu22.04 as pytorch-install -# NOTE: When updating PyTorch version, make sure to update server/pyproject.toml's `nvidia-nccl-cu12` requirement. Context: https://github.com/huggingface/text-generation-inference/pull/2099 +# NOTE: When updating PyTorch version, beware to remove `pip install nvidia-nccl-cu12==2.22.3` below in the Dockerfile. Context: https://github.com/huggingface/text-generation-inference/pull/2099 ARG PYTORCH_VERSION=2.3.0 ARG PYTHON_VERSION=3.10 @@ -234,7 +234,8 @@ COPY server/Makefile server/Makefile RUN cd server && \ make gen-server && \ pip install -r requirements_cuda.txt && \ - pip install ".[bnb, accelerate, quantize, peft, outlines]" --no-cache-dir + pip install ".[bnb, accelerate, quantize, peft, outlines]" --no-cache-dir && \ + pip install nvidia-nccl-cu12==2.22.3 # Deps before the binaries # The binaries change on every build given we burn the SHA into them diff --git a/server/pyproject.toml b/server/pyproject.toml index 010b349cf36..7b5e83fb78e 100644 --- a/server/pyproject.toml +++ b/server/pyproject.toml @@ -50,9 +50,6 @@ outlines = ["outlines"] grpcio-tools = "^1.51.1" pytest = "^7.3.0" -[tool.poetry.group.cuda.dependencies] -# Fix https://github.com/NVIDIA/nccl/issues/1251 bug in nccl 2.20.5 bundled with PyTorch 2.3, nccl 2.22.3 appears to fix the issue. -nvidia-nccl-cu12 = "==2.22.3" [[tool.poetry.source]] name = "pytorch-gpu-src" diff --git a/server/requirements_cuda.txt b/server/requirements_cuda.txt index d192efff886..88fcc4f3633 100644 --- a/server/requirements_cuda.txt +++ b/server/requirements_cuda.txt @@ -1,32 +1,31 @@ -certifi==2024.6.2 ; python_version >= "3.9" and python_version < "3.13" +backoff==2.2.1 ; python_version >= "3.9" and python_version < "3.13" +certifi==2024.2.2 ; python_version >= "3.9" and python_version < "3.13" charset-normalizer==3.3.2 ; python_version >= "3.9" and python_version < "3.13" click==8.1.7 ; python_version >= "3.9" and python_version < "3.13" colorama==0.4.6 ; python_version >= "3.9" and python_version < "3.13" and (sys_platform == "win32" or platform_system == "Windows") deprecated==1.2.14 ; python_version >= "3.9" and python_version < "3.13" einops==0.6.1 ; python_version >= "3.9" and python_version < "3.13" filelock==3.14.0 ; python_version >= "3.9" and python_version < "3.13" -fsspec==2024.6.0 ; python_version >= "3.9" and python_version < "3.13" -googleapis-common-protos==1.63.1 ; python_version >= "3.9" and python_version < "3.13" +fsspec==2024.5.0 ; python_version >= "3.9" and python_version < "3.13" +googleapis-common-protos==1.63.0 ; python_version >= "3.9" and python_version < "3.13" grpc-interceptor==0.15.4 ; python_version >= "3.9" and python_version < "3.13" grpcio-reflection==1.62.2 ; python_version >= "3.9" and python_version < "3.13" grpcio-status==1.62.2 ; python_version >= "3.9" and python_version < "3.13" -grpcio==1.64.1 ; python_version >= "3.9" and python_version < "3.13" +grpcio==1.64.0 ; python_version >= "3.9" and python_version < "3.13" hf-transfer==0.1.6 ; python_version >= "3.9" and python_version < "3.13" -huggingface-hub==0.23.2 ; python_version >= "3.9" and python_version < "3.13" +huggingface-hub==0.23.1 ; python_version >= "3.9" and python_version < "3.13" idna==3.7 ; python_version >= "3.9" and python_version < "3.13" -importlib-metadata==7.1.0 ; python_version >= "3.9" and python_version < "3.13" loguru==0.6.0 ; python_version >= "3.9" and python_version < "3.13" numpy==1.26.4 ; python_version >= "3.9" and python_version < "3.13" -opentelemetry-api==1.25.0 ; python_version >= "3.9" and python_version < "3.13" -opentelemetry-exporter-otlp-proto-common==1.25.0 ; python_version >= "3.9" and python_version < "3.13" -opentelemetry-exporter-otlp-proto-grpc==1.25.0 ; python_version >= "3.9" and python_version < "3.13" -opentelemetry-exporter-otlp-proto-http==1.25.0 ; python_version >= "3.9" and python_version < "3.13" -opentelemetry-exporter-otlp==1.25.0 ; python_version >= "3.9" and python_version < "3.13" -opentelemetry-instrumentation-grpc==0.46b0 ; python_version >= "3.9" and python_version < "3.13" -opentelemetry-instrumentation==0.46b0 ; python_version >= "3.9" and python_version < "3.13" -opentelemetry-proto==1.25.0 ; python_version >= "3.9" and python_version < "3.13" -opentelemetry-sdk==1.25.0 ; python_version >= "3.9" and python_version < "3.13" -opentelemetry-semantic-conventions==0.46b0 ; python_version >= "3.9" and python_version < "3.13" +opentelemetry-api==1.15.0 ; python_version >= "3.9" and python_version < "3.13" +opentelemetry-exporter-otlp-proto-grpc==1.15.0 ; python_version >= "3.9" and python_version < "3.13" +opentelemetry-exporter-otlp-proto-http==1.15.0 ; python_version >= "3.9" and python_version < "3.13" +opentelemetry-exporter-otlp==1.15.0 ; python_version >= "3.9" and python_version < "3.13" +opentelemetry-instrumentation-grpc==0.36b0 ; python_version >= "3.9" and python_version < "3.13" +opentelemetry-instrumentation==0.36b0 ; python_version >= "3.9" and python_version < "3.13" +opentelemetry-proto==1.15.0 ; python_version >= "3.9" and python_version < "3.13" +opentelemetry-sdk==1.15.0 ; python_version >= "3.9" and python_version < "3.13" +opentelemetry-semantic-conventions==0.36b0 ; python_version >= "3.9" and python_version < "3.13" packaging==24.0 ; python_version >= "3.9" and python_version < "3.13" pillow==10.3.0 ; python_version >= "3.9" and python_version < "3.13" prometheus-client==0.20.0 ; python_version >= "3.9" and python_version < "3.13" @@ -34,17 +33,16 @@ protobuf==4.25.3 ; python_version >= "3.9" and python_version < "3.13" py-cpuinfo==9.0.0 ; python_version >= "3.9" and python_version < "3.13" pyyaml==6.0.1 ; python_version >= "3.9" and python_version < "3.13" regex==2024.5.15 ; python_version >= "3.9" and python_version < "3.13" -requests==2.32.3 ; python_version >= "3.9" and python_version < "3.13" +requests==2.32.2 ; python_version >= "3.9" and python_version < "3.13" safetensors==0.4.3 ; python_version >= "3.9" and python_version < "3.13" scipy==1.13.1 ; python_version >= "3.9" and python_version < "3.13" sentencepiece==0.1.99 ; python_version >= "3.9" and python_version < "3.13" setuptools==70.0.0 ; python_version >= "3.9" and python_version < "3.13" tokenizers==0.19.1 ; python_version >= "3.9" and python_version < "3.13" tqdm==4.66.4 ; python_version >= "3.9" and python_version < "3.13" -transformers==4.41.2 ; python_version >= "3.9" and python_version < "3.13" +transformers==4.41.1 ; python_version >= "3.9" and python_version < "3.13" typer==0.6.1 ; python_version >= "3.9" and python_version < "3.13" -typing-extensions==4.12.1 ; python_version >= "3.9" and python_version < "3.13" +typing-extensions==4.12.0 ; python_version >= "3.9" and python_version < "3.13" urllib3==2.2.1 ; python_version >= "3.9" and python_version < "3.13" win32-setctime==1.1.0 ; python_version >= "3.9" and python_version < "3.13" and sys_platform == "win32" wrapt==1.16.0 ; python_version >= "3.9" and python_version < "3.13" -zipp==3.19.1 ; python_version >= "3.9" and python_version < "3.13" diff --git a/server/requirements_rocm.txt b/server/requirements_rocm.txt index d192efff886..88fcc4f3633 100644 --- a/server/requirements_rocm.txt +++ b/server/requirements_rocm.txt @@ -1,32 +1,31 @@ -certifi==2024.6.2 ; python_version >= "3.9" and python_version < "3.13" +backoff==2.2.1 ; python_version >= "3.9" and python_version < "3.13" +certifi==2024.2.2 ; python_version >= "3.9" and python_version < "3.13" charset-normalizer==3.3.2 ; python_version >= "3.9" and python_version < "3.13" click==8.1.7 ; python_version >= "3.9" and python_version < "3.13" colorama==0.4.6 ; python_version >= "3.9" and python_version < "3.13" and (sys_platform == "win32" or platform_system == "Windows") deprecated==1.2.14 ; python_version >= "3.9" and python_version < "3.13" einops==0.6.1 ; python_version >= "3.9" and python_version < "3.13" filelock==3.14.0 ; python_version >= "3.9" and python_version < "3.13" -fsspec==2024.6.0 ; python_version >= "3.9" and python_version < "3.13" -googleapis-common-protos==1.63.1 ; python_version >= "3.9" and python_version < "3.13" +fsspec==2024.5.0 ; python_version >= "3.9" and python_version < "3.13" +googleapis-common-protos==1.63.0 ; python_version >= "3.9" and python_version < "3.13" grpc-interceptor==0.15.4 ; python_version >= "3.9" and python_version < "3.13" grpcio-reflection==1.62.2 ; python_version >= "3.9" and python_version < "3.13" grpcio-status==1.62.2 ; python_version >= "3.9" and python_version < "3.13" -grpcio==1.64.1 ; python_version >= "3.9" and python_version < "3.13" +grpcio==1.64.0 ; python_version >= "3.9" and python_version < "3.13" hf-transfer==0.1.6 ; python_version >= "3.9" and python_version < "3.13" -huggingface-hub==0.23.2 ; python_version >= "3.9" and python_version < "3.13" +huggingface-hub==0.23.1 ; python_version >= "3.9" and python_version < "3.13" idna==3.7 ; python_version >= "3.9" and python_version < "3.13" -importlib-metadata==7.1.0 ; python_version >= "3.9" and python_version < "3.13" loguru==0.6.0 ; python_version >= "3.9" and python_version < "3.13" numpy==1.26.4 ; python_version >= "3.9" and python_version < "3.13" -opentelemetry-api==1.25.0 ; python_version >= "3.9" and python_version < "3.13" -opentelemetry-exporter-otlp-proto-common==1.25.0 ; python_version >= "3.9" and python_version < "3.13" -opentelemetry-exporter-otlp-proto-grpc==1.25.0 ; python_version >= "3.9" and python_version < "3.13" -opentelemetry-exporter-otlp-proto-http==1.25.0 ; python_version >= "3.9" and python_version < "3.13" -opentelemetry-exporter-otlp==1.25.0 ; python_version >= "3.9" and python_version < "3.13" -opentelemetry-instrumentation-grpc==0.46b0 ; python_version >= "3.9" and python_version < "3.13" -opentelemetry-instrumentation==0.46b0 ; python_version >= "3.9" and python_version < "3.13" -opentelemetry-proto==1.25.0 ; python_version >= "3.9" and python_version < "3.13" -opentelemetry-sdk==1.25.0 ; python_version >= "3.9" and python_version < "3.13" -opentelemetry-semantic-conventions==0.46b0 ; python_version >= "3.9" and python_version < "3.13" +opentelemetry-api==1.15.0 ; python_version >= "3.9" and python_version < "3.13" +opentelemetry-exporter-otlp-proto-grpc==1.15.0 ; python_version >= "3.9" and python_version < "3.13" +opentelemetry-exporter-otlp-proto-http==1.15.0 ; python_version >= "3.9" and python_version < "3.13" +opentelemetry-exporter-otlp==1.15.0 ; python_version >= "3.9" and python_version < "3.13" +opentelemetry-instrumentation-grpc==0.36b0 ; python_version >= "3.9" and python_version < "3.13" +opentelemetry-instrumentation==0.36b0 ; python_version >= "3.9" and python_version < "3.13" +opentelemetry-proto==1.15.0 ; python_version >= "3.9" and python_version < "3.13" +opentelemetry-sdk==1.15.0 ; python_version >= "3.9" and python_version < "3.13" +opentelemetry-semantic-conventions==0.36b0 ; python_version >= "3.9" and python_version < "3.13" packaging==24.0 ; python_version >= "3.9" and python_version < "3.13" pillow==10.3.0 ; python_version >= "3.9" and python_version < "3.13" prometheus-client==0.20.0 ; python_version >= "3.9" and python_version < "3.13" @@ -34,17 +33,16 @@ protobuf==4.25.3 ; python_version >= "3.9" and python_version < "3.13" py-cpuinfo==9.0.0 ; python_version >= "3.9" and python_version < "3.13" pyyaml==6.0.1 ; python_version >= "3.9" and python_version < "3.13" regex==2024.5.15 ; python_version >= "3.9" and python_version < "3.13" -requests==2.32.3 ; python_version >= "3.9" and python_version < "3.13" +requests==2.32.2 ; python_version >= "3.9" and python_version < "3.13" safetensors==0.4.3 ; python_version >= "3.9" and python_version < "3.13" scipy==1.13.1 ; python_version >= "3.9" and python_version < "3.13" sentencepiece==0.1.99 ; python_version >= "3.9" and python_version < "3.13" setuptools==70.0.0 ; python_version >= "3.9" and python_version < "3.13" tokenizers==0.19.1 ; python_version >= "3.9" and python_version < "3.13" tqdm==4.66.4 ; python_version >= "3.9" and python_version < "3.13" -transformers==4.41.2 ; python_version >= "3.9" and python_version < "3.13" +transformers==4.41.1 ; python_version >= "3.9" and python_version < "3.13" typer==0.6.1 ; python_version >= "3.9" and python_version < "3.13" -typing-extensions==4.12.1 ; python_version >= "3.9" and python_version < "3.13" +typing-extensions==4.12.0 ; python_version >= "3.9" and python_version < "3.13" urllib3==2.2.1 ; python_version >= "3.9" and python_version < "3.13" win32-setctime==1.1.0 ; python_version >= "3.9" and python_version < "3.13" and sys_platform == "win32" wrapt==1.16.0 ; python_version >= "3.9" and python_version < "3.13" -zipp==3.19.1 ; python_version >= "3.9" and python_version < "3.13"