From 6d73c1b14741da848c63cbf63076f23fc61e117e Mon Sep 17 00:00:00 2001
From: Jefferson Fialho <jfialho@ibm.com>
Date: Thu, 12 Dec 2024 09:31:40 -0300
Subject: [PATCH] install numactl to enable fastsafetensors

Signed-off-by: Jefferson Fialho <jfialho@ibm.com>
---
 Dockerfile.ubi | 68 ++++++++++++++++++++++++++++++++++----------------
 1 file changed, 47 insertions(+), 21 deletions(-)

diff --git a/Dockerfile.ubi b/Dockerfile.ubi
index 8bb80a299ccc4..d8a4ad4465cb6 100644
--- a/Dockerfile.ubi
+++ b/Dockerfile.ubi
@@ -42,10 +42,37 @@ FROM python-install as cuda-base
 RUN curl -Lo /etc/yum.repos.d/cuda-rhel9.repo \
         https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo
 
+# Install necessary packages to build numactl from source
+RUN microdnf install -y autoconf automake libtool make rpm-build
+
+# Download the numactl source RPM
+RUN microdnf download --enablerepo=ubi-9-baseos-source --source numactl.src
+
+# Get NUMACTL_V
+RUN NUMACTL_V=$(ls /root/numactl-* | sed -r 's/^numactl-(.+)\.el9\.src\.rpm$/\1/') && \
+    echo "NUMACTL_V is set to $NUMACTL_V"
+
+# Set NUMACTL_V as an environment variable for future layers
+ENV NUMACTL_V=${NUMACTL_V}
+
+# Install the source RPM
+RUN rpm -i /root/numactl-${NUMACTL_V}.el9.src.rpm
+
+# Build numactl from source
+RUN rpmbuild -ba /root/rpmbuild/SPECS/numactl.spec
+
+# Install the built RPMs
+RUN rpm -i /root/rpmbuild/RPMS/x86_64/{numactl-libs-${NUMACTL_V}.el9.x86_64.rpm,numactl-${NUMACTL_V}.el9.x86_64.rpm,numactl-devel-${NUMACTL_V}.el9.x86_64.rpm}
+
 RUN microdnf install -y \
-        cuda-nvcc-12-4 cuda-nvtx-12-4 cuda-libraries-devel-12-4 && \
+    cuda-nvcc-12-4 cuda-nvtx-12-4 cuda-libraries-devel-12-4 && \
     microdnf clean all
 
+# Search for numa.h in common locations
+RUN echo "### Searching for numa.h in common directories ###" && \
+    find /usr /usr/local /opt /lib -name numa.h | tee /dev/stderr && \
+    echo "### END OF NUMA.H SEARCH ###"
+
 ENV CUDA_HOME="/usr/local/cuda" \
     PATH="${CUDA_HOME}/bin:${PATH}" \
     LD_LIBRARY_PATH="${CUDA_HOME}/lib64:${CUDA_HOME}/extras/CUPTI/lib64:${LD_LIBRARY_PATH}"
@@ -201,23 +228,22 @@ WORKDIR /home/vllm
 
 ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
 
-
-FROM vllm-openai as vllm-grpc-adapter
-
-USER root
-
-RUN --mount=type=cache,target=/root/.cache/pip \
-    --mount=type=cache,target=/root/.cache/uv \
-    --mount=type=bind,from=build,src=/workspace/dist,target=/workspace/dist \
-    HOME=/root uv pip install "$(echo /workspace/dist/*.whl)[tensorizer]" vllm-tgis-adapter==0.5.3
-
-ENV GRPC_PORT=8033 \
-    PORT=8000 \
-    # As an optimization, vLLM disables logprobs when using spec decoding by
-    # default, but this would be unexpected to users of a hosted model that
-    # happens to have spec decoding
-    # see: https://github.com/vllm-project/vllm/pull/6485
-    DISABLE_LOGPROBS_DURING_SPEC_DECODING=false
-
-USER 2000
-ENTRYPOINT ["python3", "-m", "vllm_tgis_adapter", "--uvicorn-log-level=warning"]
+# FROM vllm-openai as vllm-grpc-adapter
+#
+# USER root
+#
+# RUN --mount=type=cache,target=/root/.cache/pip \
+#     --mount=type=cache,target=/root/.cache/uv \
+#     --mount=type=bind,from=build,src=/workspace/dist,target=/workspace/dist \
+#     HOME=/root uv pip install "$(echo /workspace/dist/*.whl)[tensorizer]" vllm-tgis-adapter==0.5.3
+# 
+# ENV GRPC_PORT=8033 \
+#     PORT=8000 \
+#     # As an optimization, vLLM disables logprobs when using spec decoding by
+#     # default, but this would be unexpected to users of a hosted model that
+#     # happens to have spec decoding
+#     # see: https://github.com/vllm-project/vllm/pull/6485
+#     DISABLE_LOGPROBS_DURING_SPEC_DECODING=false
+#
+# USER 2000
+# ENTRYPOINT ["python3", "-m", "vllm_tgis_adapter", "--uvicorn-log-level=warning"]