From 6d73c1b14741da848c63cbf63076f23fc61e117e Mon Sep 17 00:00:00 2001 From: Jefferson Fialho Date: Thu, 12 Dec 2024 09:31:40 -0300 Subject: [PATCH] install numactl to enable fastsafetensors Signed-off-by: Jefferson Fialho --- Dockerfile.ubi | 68 ++++++++++++++++++++++++++++++++++---------------- 1 file changed, 47 insertions(+), 21 deletions(-) diff --git a/Dockerfile.ubi b/Dockerfile.ubi index 8bb80a299ccc4..d8a4ad4465cb6 100644 --- a/Dockerfile.ubi +++ b/Dockerfile.ubi @@ -42,10 +42,37 @@ FROM python-install as cuda-base RUN curl -Lo /etc/yum.repos.d/cuda-rhel9.repo \ https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo +# Install necessary packages to build numactl from source +RUN microdnf install -y autoconf automake libtool make rpm-build + +# Download the numactl source RPM +RUN microdnf download --enablerepo=ubi-9-baseos-source --source numactl.src + +# Get NUMACTL_V +RUN NUMACTL_V=$(ls /root/numactl-* | sed -r 's/^numactl-(.+)\.el9\.src\.rpm$/\1/') && \ + echo "NUMACTL_V is set to $NUMACTL_V" + +# Set NUMACTL_V as an environment variable for future layers +ENV NUMACTL_V=${NUMACTL_V} + +# Install the source RPM +RUN rpm -i /root/numactl-${NUMACTL_V}.el9.src.rpm + +# Build numactl from source +RUN rpmbuild -ba /root/rpmbuild/SPECS/numactl.spec + +# Install the built RPMs +RUN rpm -i /root/rpmbuild/RPMS/x86_64/{numactl-libs-${NUMACTL_V}.el9.x86_64.rpm,numactl-${NUMACTL_V}.el9.x86_64.rpm,numactl-devel-${NUMACTL_V}.el9.x86_64.rpm} + RUN microdnf install -y \ - cuda-nvcc-12-4 cuda-nvtx-12-4 cuda-libraries-devel-12-4 && \ + cuda-nvcc-12-4 cuda-nvtx-12-4 cuda-libraries-devel-12-4 && \ microdnf clean all +# Search for numa.h in common locations +RUN echo "### Searching for numa.h in common directories ###" && \ + find /usr /usr/local /opt /lib -name numa.h | tee /dev/stderr && \ + echo "### END OF NUMA.H SEARCH ###" + ENV CUDA_HOME="/usr/local/cuda" \ PATH="${CUDA_HOME}/bin:${PATH}" \ LD_LIBRARY_PATH="${CUDA_HOME}/lib64:${CUDA_HOME}/extras/CUPTI/lib64:${LD_LIBRARY_PATH}" @@ -201,23 +228,22 @@ WORKDIR /home/vllm ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"] - -FROM vllm-openai as vllm-grpc-adapter - -USER root - -RUN --mount=type=cache,target=/root/.cache/pip \ - --mount=type=cache,target=/root/.cache/uv \ - --mount=type=bind,from=build,src=/workspace/dist,target=/workspace/dist \ - HOME=/root uv pip install "$(echo /workspace/dist/*.whl)[tensorizer]" vllm-tgis-adapter==0.5.3 - -ENV GRPC_PORT=8033 \ - PORT=8000 \ - # As an optimization, vLLM disables logprobs when using spec decoding by - # default, but this would be unexpected to users of a hosted model that - # happens to have spec decoding - # see: https://github.com/vllm-project/vllm/pull/6485 - DISABLE_LOGPROBS_DURING_SPEC_DECODING=false - -USER 2000 -ENTRYPOINT ["python3", "-m", "vllm_tgis_adapter", "--uvicorn-log-level=warning"] +# FROM vllm-openai as vllm-grpc-adapter +# +# USER root +# +# RUN --mount=type=cache,target=/root/.cache/pip \ +# --mount=type=cache,target=/root/.cache/uv \ +# --mount=type=bind,from=build,src=/workspace/dist,target=/workspace/dist \ +# HOME=/root uv pip install "$(echo /workspace/dist/*.whl)[tensorizer]" vllm-tgis-adapter==0.5.3 +# +# ENV GRPC_PORT=8033 \ +# PORT=8000 \ +# # As an optimization, vLLM disables logprobs when using spec decoding by +# # default, but this would be unexpected to users of a hosted model that +# # happens to have spec decoding +# # see: https://github.com/vllm-project/vllm/pull/6485 +# DISABLE_LOGPROBS_DURING_SPEC_DECODING=false +# +# USER 2000 +# ENTRYPOINT ["python3", "-m", "vllm_tgis_adapter", "--uvicorn-log-level=warning"]