diff --git a/Dockerfile b/Dockerfile index 7094fdc50dd..653b48b1f83 100644 --- a/Dockerfile +++ b/Dockerfile @@ -202,7 +202,7 @@ COPY server/Makefile server/Makefile RUN cd server && \ make gen-server && \ pip install -r requirements.txt && \ - pip install ".[bnb, accelerate, quantize]" --no-cache-dir + pip install ".[bnb, accelerate, quantize, peft]" --no-cache-dir # Install benchmarker COPY --from=builder /usr/src/target/release/text-generation-benchmark /usr/local/bin/text-generation-benchmark diff --git a/server/Makefile b/server/Makefile index 7ba4aed4075..5fc51a94dca 100644 --- a/server/Makefile +++ b/server/Makefile @@ -23,10 +23,10 @@ install-torch: install: gen-server install-torch pip install pip --upgrade pip install -r requirements.txt - pip install -e ".[bnb, accelerate]" + pip install -e ".[bnb, accelerate, torch, peft]" run-dev: SAFETENSORS_FAST_GPU=1 python -m torch.distributed.run --nproc_per_node=2 text_generation_server/cli.py serve bigscience/bloom-560m --sharded export-requirements: - poetry export -o requirements.txt -E bnb -E quantize --without-hashes + poetry export -o requirements.txt -E bnb --without-hashes diff --git a/server/poetry.lock b/server/poetry.lock index a67ca13ff5e..d07c2c85c2e 100644 --- a/server/poetry.lock +++ b/server/poetry.lock @@ -4,7 +4,7 @@ name = "accelerate" version = "0.20.3" description = "Accelerate" -optional = false +optional = true python-versions = ">=3.7.0" files = [ {file = "accelerate-0.20.3-py3-none-any.whl", hash = "sha256:147183e7a2215f7bd45a7af3b986a963daa8a61fa58b0912b9473049e011ad15"}, @@ -837,7 +837,7 @@ files = [ name = "jinja2" version = "3.1.2" description = "A very fast and expressive template engine." -optional = false +optional = true python-versions = ">=3.7" files = [ {file = "Jinja2-3.1.2-py3-none-any.whl", hash = "sha256:6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61"}, @@ -872,7 +872,7 @@ dev = ["Sphinx (>=4.1.1)", "black (>=19.10b0)", "colorama (>=0.3.4)", "docutils name = "markupsafe" version = "2.1.3" description = "Safely add untrusted strings to HTML/XML markup." -optional = false +optional = true python-versions = ">=3.7" files = [ {file = "MarkupSafe-2.1.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cd0f502fe016460680cd20aaa5a76d241d6f35a1c3350c474bac1273803893fa"}, @@ -941,7 +941,7 @@ files = [ name = "mpmath" version = "1.3.0" description = "Python library for arbitrary-precision floating-point arithmetic" -optional = false +optional = true python-versions = "*" files = [ {file = "mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c"}, @@ -1069,7 +1069,7 @@ dill = ">=0.3.7" name = "networkx" version = "3.2.1" description = "Python package for creating and manipulating graphs and networks" -optional = false +optional = true python-versions = ">=3.9" files = [ {file = "networkx-3.2.1-py3-none-any.whl", hash = "sha256:f18c69adc97877c42332c170849c96cefa91881c99a7cb3e95b7c659ebdc1ec2"}, @@ -1132,7 +1132,7 @@ files = [ name = "nvidia-cublas-cu12" version = "12.1.3.1" description = "CUBLAS native runtime libraries" -optional = false +optional = true python-versions = ">=3" files = [ {file = "nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl", hash = "sha256:ee53ccca76a6fc08fb9701aa95b6ceb242cdaab118c3bb152af4e579af792728"}, @@ -1143,7 +1143,7 @@ files = [ name = "nvidia-cuda-cupti-cu12" version = "12.1.105" description = "CUDA profiling tools runtime libs." -optional = false +optional = true python-versions = ">=3" files = [ {file = "nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:e54fde3983165c624cb79254ae9818a456eb6e87a7fd4d56a2352c24ee542d7e"}, @@ -1154,7 +1154,7 @@ files = [ name = "nvidia-cuda-nvrtc-cu12" version = "12.1.105" description = "NVRTC native runtime libraries" -optional = false +optional = true python-versions = ">=3" files = [ {file = "nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:339b385f50c309763ca65456ec75e17bbefcbbf2893f462cb8b90584cd27a1c2"}, @@ -1165,7 +1165,7 @@ files = [ name = "nvidia-cuda-runtime-cu12" version = "12.1.105" description = "CUDA Runtime native Libraries" -optional = false +optional = true python-versions = ">=3" files = [ {file = "nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:6e258468ddf5796e25f1dc591a31029fa317d97a0a94ed93468fc86301d61e40"}, @@ -1176,7 +1176,7 @@ files = [ name = "nvidia-cudnn-cu12" version = "8.9.2.26" description = "cuDNN runtime libraries" -optional = false +optional = true python-versions = ">=3" files = [ {file = "nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl", hash = "sha256:5ccb288774fdfb07a7e7025ffec286971c06d8d7b4fb162525334616d7629ff9"}, @@ -1189,7 +1189,7 @@ nvidia-cublas-cu12 = "*" name = "nvidia-cufft-cu12" version = "11.0.2.54" description = "CUFFT native runtime libraries" -optional = false +optional = true python-versions = ">=3" files = [ {file = "nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl", hash = "sha256:794e3948a1aa71fd817c3775866943936774d1c14e7628c74f6f7417224cdf56"}, @@ -1200,7 +1200,7 @@ files = [ name = "nvidia-curand-cu12" version = "10.3.2.106" description = "CURAND native runtime libraries" -optional = false +optional = true python-versions = ">=3" files = [ {file = "nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl", hash = "sha256:9d264c5036dde4e64f1de8c50ae753237c12e0b1348738169cd0f8a536c0e1e0"}, @@ -1211,7 +1211,7 @@ files = [ name = "nvidia-cusolver-cu12" version = "11.4.5.107" description = "CUDA solver native runtime libraries" -optional = false +optional = true python-versions = ">=3" files = [ {file = "nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl", hash = "sha256:8a7ec542f0412294b15072fa7dab71d31334014a69f953004ea7a118206fe0dd"}, @@ -1227,7 +1227,7 @@ nvidia-nvjitlink-cu12 = "*" name = "nvidia-cusparse-cu12" version = "12.1.0.106" description = "CUSPARSE native runtime libraries" -optional = false +optional = true python-versions = ">=3" files = [ {file = "nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl", hash = "sha256:f3b50f42cf363f86ab21f720998517a659a48131e8d538dc02f8768237bd884c"}, @@ -1241,7 +1241,7 @@ nvidia-nvjitlink-cu12 = "*" name = "nvidia-nccl-cu12" version = "2.18.1" description = "NVIDIA Collective Communication Library (NCCL) Runtime" -optional = false +optional = true python-versions = ">=3" files = [ {file = "nvidia_nccl_cu12-2.18.1-py3-none-manylinux1_x86_64.whl", hash = "sha256:1a6c4acefcbebfa6de320f412bf7866de856e786e0462326ba1bac40de0b5e71"}, @@ -1251,7 +1251,7 @@ files = [ name = "nvidia-nvjitlink-cu12" version = "12.3.101" description = "Nvidia JIT LTO Library" -optional = false +optional = true python-versions = ">=3" files = [ {file = "nvidia_nvjitlink_cu12-12.3.101-py3-none-manylinux1_x86_64.whl", hash = "sha256:64335a8088e2b9d196ae8665430bc6a2b7e6ef2eb877a9c735c804bd4ff6467c"}, @@ -1262,7 +1262,7 @@ files = [ name = "nvidia-nvtx-cu12" version = "12.1.105" description = "NVIDIA Tools Extension" -optional = false +optional = true python-versions = ">=3" files = [ {file = "nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:dc21cf308ca5691e7c04d962e213f8a4aa9bbfa23d95412f452254c2caeb09e5"}, @@ -1506,7 +1506,7 @@ xml = ["lxml (>=4.8.0)"] name = "peft" version = "0.4.0" description = "Parameter-Efficient Fine-Tuning (PEFT)" -optional = false +optional = true python-versions = ">=3.8.0" files = [ {file = "peft-0.4.0-py3-none-any.whl", hash = "sha256:2cf992772a6d703814477e0bdcdadd68cb8ea388111ce2d793dd2ff0e438f357"}, @@ -1635,7 +1635,7 @@ files = [ name = "psutil" version = "5.9.6" description = "Cross-platform lib for process and system monitoring in Python." -optional = false +optional = true python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" files = [ {file = "psutil-5.9.6-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:fb8a697f11b0f5994550555fcfe3e69799e5b060c8ecf9e2f75c69302cc35c0d"}, @@ -2148,7 +2148,7 @@ files = [ name = "sympy" version = "1.12" description = "Computer algebra system (CAS) in Python" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "sympy-1.12-py3-none-any.whl", hash = "sha256:c3588cd4295d0c0f603d0f2ae780587e64e2efeedb3521e46b9bb1d08d184fa5"}, @@ -2238,7 +2238,7 @@ files = [ name = "torch" version = "2.1.1" description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration" -optional = false +optional = true python-versions = ">=3.8.0" files = [ {file = "torch-2.1.1-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:5ebc43f5355a9b7be813392b3fb0133991f0380f6f0fcc8218d5468dc45d1071"}, @@ -2380,7 +2380,7 @@ vision = ["Pillow (<10.0.0)"] name = "triton" version = "2.1.0" description = "A language and compiler for custom Deep Learning operations" -optional = false +optional = true python-versions = "*" files = [ {file = "triton-2.1.0-0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:66439923a30d5d48399b08a9eae10370f6c261a5ec864a64983bae63152d39d7"}, @@ -2775,9 +2775,11 @@ multidict = ">=4.0" [extras] accelerate = ["accelerate"] bnb = ["bitsandbytes"] +peft = ["peft"] quantize = ["accelerate", "datasets", "texttable"] +torch = ["torch"] [metadata] lock-version = "2.0" python-versions = ">=3.9,<3.13" -content-hash = "d1718673df4034f060973e608a0a8e0b9319bf3d20b656a1337cd048875dccce" +content-hash = "354dffb296d5b93ff2df541498cf03176ca0c18639d79fdd7339cd33caecde50" diff --git a/server/pyproject.toml b/server/pyproject.toml index 9f8c8b5466b..32d6e7dd53f 100644 --- a/server/pyproject.toml +++ b/server/pyproject.toml @@ -30,14 +30,16 @@ transformers = "^4.32.1" einops = "^0.6.1" texttable = { version = "^1.6.7", optional = true } datasets = { version = "^2.14.0", optional = true } -peft = "^0.4.0" -torch = { version = "^2.1.0" } +peft = { version = "^0.4.0", optional = true } +torch = { version = "^2.1.0", optional = true } scipy = "^1.11.1" pillow = "^10.0.0" [tool.poetry.extras] +torch = ["torch"] accelerate = ["accelerate"] bnb = ["bitsandbytes"] +peft = ["peft"] quantize = ["texttable", "datasets", "accelerate"] [tool.poetry.group.dev.dependencies] diff --git a/server/requirements.txt b/server/requirements.txt index c56e48596fa..bc1b8891216 100644 --- a/server/requirements.txt +++ b/server/requirements.txt @@ -1,22 +1,13 @@ -accelerate==0.20.3 ; python_version >= "3.9" and python_version < "3.13" -aiohttp==3.9.0 ; python_version >= "3.9" and python_version < "3.13" -aiosignal==1.3.1 ; python_version >= "3.9" and python_version < "3.13" -async-timeout==4.0.3 ; python_version >= "3.9" and python_version < "3.11" -attrs==23.1.0 ; python_version >= "3.9" and python_version < "3.13" backoff==2.2.1 ; python_version >= "3.9" and python_version < "3.13" bitsandbytes==0.41.2.post2 ; python_version >= "3.9" and python_version < "3.13" certifi==2023.11.17 ; python_version >= "3.9" and python_version < "3.13" charset-normalizer==3.3.2 ; python_version >= "3.9" and python_version < "3.13" click==8.1.7 ; python_version >= "3.9" and python_version < "3.13" colorama==0.4.6 ; python_version >= "3.9" and python_version < "3.13" and (sys_platform == "win32" or platform_system == "Windows") -datasets==2.14.7 ; python_version >= "3.9" and python_version < "3.13" deprecated==1.2.14 ; python_version >= "3.9" and python_version < "3.13" -dill==0.3.7 ; python_version >= "3.9" and python_version < "3.13" einops==0.6.1 ; python_version >= "3.9" and python_version < "3.13" filelock==3.13.1 ; python_version >= "3.9" and python_version < "3.13" -frozenlist==1.4.0 ; python_version >= "3.9" and python_version < "3.13" fsspec==2023.10.0 ; python_version >= "3.9" and python_version < "3.13" -fsspec[http]==2023.10.0 ; python_version >= "3.9" and python_version < "3.13" googleapis-common-protos==1.61.0 ; python_version >= "3.9" and python_version < "3.13" grpc-interceptor==0.15.4 ; python_version >= "3.9" and python_version < "3.13" grpcio-reflection==1.59.3 ; python_version >= "3.9" and python_version < "3.13" @@ -25,26 +16,8 @@ grpcio==1.59.3 ; python_version >= "3.9" and python_version < "3.13" hf-transfer==0.1.4 ; python_version >= "3.9" and python_version < "3.13" huggingface-hub==0.16.4 ; python_version >= "3.9" and python_version < "3.13" idna==3.4 ; python_version >= "3.9" and python_version < "3.13" -jinja2==3.1.2 ; python_version >= "3.9" and python_version < "3.13" loguru==0.6.0 ; python_version >= "3.9" and python_version < "3.13" -markupsafe==2.1.3 ; python_version >= "3.9" and python_version < "3.13" -mpmath==1.3.0 ; python_version >= "3.9" and python_version < "3.13" -multidict==6.0.4 ; python_version >= "3.9" and python_version < "3.13" -multiprocess==0.70.15 ; python_version >= "3.9" and python_version < "3.13" -networkx==3.2.1 ; python_version >= "3.9" and python_version < "3.13" numpy==1.26.2 ; python_version >= "3.9" and python_version < "3.13" -nvidia-cublas-cu12==12.1.3.1 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.9" and python_version < "3.13" -nvidia-cuda-cupti-cu12==12.1.105 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.9" and python_version < "3.13" -nvidia-cuda-nvrtc-cu12==12.1.105 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.9" and python_version < "3.13" -nvidia-cuda-runtime-cu12==12.1.105 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.9" and python_version < "3.13" -nvidia-cudnn-cu12==8.9.2.26 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.9" and python_version < "3.13" -nvidia-cufft-cu12==11.0.2.54 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.9" and python_version < "3.13" -nvidia-curand-cu12==10.3.2.106 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.9" and python_version < "3.13" -nvidia-cusolver-cu12==11.4.5.107 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.9" and python_version < "3.13" -nvidia-cusparse-cu12==12.1.0.106 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.9" and python_version < "3.13" -nvidia-nccl-cu12==2.18.1 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.9" and python_version < "3.13" -nvidia-nvjitlink-cu12==12.3.101 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.9" and python_version < "3.13" -nvidia-nvtx-cu12==12.1.105 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.9" and python_version < "3.13" opentelemetry-api==1.15.0 ; python_version >= "3.9" and python_version < "3.13" opentelemetry-exporter-otlp-proto-grpc==1.15.0 ; python_version >= "3.9" and python_version < "3.13" opentelemetry-exporter-otlp-proto-http==1.15.0 ; python_version >= "3.9" and python_version < "3.13" @@ -55,15 +28,8 @@ opentelemetry-proto==1.15.0 ; python_version >= "3.9" and python_version < "3.13 opentelemetry-sdk==1.15.0 ; python_version >= "3.9" and python_version < "3.13" opentelemetry-semantic-conventions==0.36b0 ; python_version >= "3.9" and python_version < "3.13" packaging==23.2 ; python_version >= "3.9" and python_version < "3.13" -pandas==2.1.3 ; python_version >= "3.9" and python_version < "3.13" -peft==0.4.0 ; python_version >= "3.9" and python_version < "3.13" pillow==10.1.0 ; python_version >= "3.9" and python_version < "3.13" protobuf==4.25.1 ; python_version >= "3.9" and python_version < "3.13" -psutil==5.9.6 ; python_version >= "3.9" and python_version < "3.13" -pyarrow-hotfix==0.6 ; python_version >= "3.9" and python_version < "3.13" -pyarrow==14.0.1 ; python_version >= "3.9" and python_version < "3.13" -python-dateutil==2.8.2 ; python_version >= "3.9" and python_version < "3.13" -pytz==2023.3.post1 ; python_version >= "3.9" and python_version < "3.13" pyyaml==6.0.1 ; python_version >= "3.9" and python_version < "3.13" regex==2023.10.3 ; python_version >= "3.9" and python_version < "3.13" requests==2.31.0 ; python_version >= "3.9" and python_version < "3.13" @@ -71,19 +37,11 @@ safetensors==0.3.3 ; python_version >= "3.9" and python_version < "3.13" scipy==1.11.4 ; python_version >= "3.9" and python_version < "3.13" sentencepiece==0.1.99 ; python_version >= "3.9" and python_version < "3.13" setuptools==69.0.2 ; python_version >= "3.9" and python_version < "3.13" -six==1.16.0 ; python_version >= "3.9" and python_version < "3.13" -sympy==1.12 ; python_version >= "3.9" and python_version < "3.13" -texttable==1.7.0 ; python_version >= "3.9" and python_version < "3.13" tokenizers==0.13.3 ; python_version >= "3.9" and python_version < "3.13" -torch==2.1.1 ; python_version >= "3.9" and python_version < "3.13" tqdm==4.66.1 ; python_version >= "3.9" and python_version < "3.13" transformers==4.33.3 ; python_version >= "3.9" and python_version < "3.13" -triton==2.1.0 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.9" and python_version < "3.13" typer==0.6.1 ; python_version >= "3.9" and python_version < "3.13" typing-extensions==4.8.0 ; python_version >= "3.9" and python_version < "3.13" -tzdata==2023.3 ; python_version >= "3.9" and python_version < "3.13" urllib3==2.1.0 ; python_version >= "3.9" and python_version < "3.13" win32-setctime==1.1.0 ; python_version >= "3.9" and python_version < "3.13" and sys_platform == "win32" wrapt==1.16.0 ; python_version >= "3.9" and python_version < "3.13" -xxhash==3.4.1 ; python_version >= "3.9" and python_version < "3.13" -yarl==1.9.3 ; python_version >= "3.9" and python_version < "3.13"