From 069bbbf8a4c8c1ce971532d09ee55b5fe8e148c5 Mon Sep 17 00:00:00 2001 From: Sean Pryor Date: Tue, 3 Sep 2024 19:18:11 -0400 Subject: [PATCH] Update ROCm build for UBI --- Dockerfile.rocm.ubi | 291 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 291 insertions(+) create mode 100644 Dockerfile.rocm.ubi diff --git a/Dockerfile.rocm.ubi b/Dockerfile.rocm.ubi new file mode 100644 index 0000000000000..850c1b9f67024 --- /dev/null +++ b/Dockerfile.rocm.ubi @@ -0,0 +1,291 @@ +## Global Args ################################################################# +ARG BASE_UBI_IMAGE_TAG=9.4 +ARG PYTHON_VERSION=3.11 +# Default ROCm ARCHes to build vLLM for. +ARG PYTORCH_ROCM_ARCH="gfx908;gfx90a;gfx942;gfx1100" + +## Base Layer ################################################################## +FROM registry.access.redhat.com/ubi9/ubi-minimal:${BASE_UBI_IMAGE_TAG} as rocm-base + +# Max jobs for parallel build +ARG MAX_JOBS=12 + +ENV BUILD_TARGET='rocm' + +USER root + +ENV ROCM_VERSION=6.1.2 + +# Set up ROCm repository and install necessary packages + +RUN echo "[amdgpu]" > /etc/yum.repos.d/amdgpu.repo && \ +echo "name=amdgpu" >> /etc/yum.repos.d/amdgpu.repo && \ +echo "baseurl=https://repo.radeon.com/amdgpu/${ROCM_VERSION}/rhel/9.4/main/x86_64/" >> /etc/yum.repos.d/amdgpu.repo && \ +echo "enabled=1" >> /etc/yum.repos.d/amdgpu.repo && \ +echo "priority=50" >> /etc/yum.repos.d/amdgpu.repo && \ +echo "gpgcheck=1" >> /etc/yum.repos.d/amdgpu.repo && \ +echo "gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key" >> /etc/yum.repos.d/amdgpu.repo && \ +echo "[ROCm-${ROCM_VERSION}]" >> /etc/yum.repos.d/amdgpu.repo && \ +echo "name=ROCm${ROCM_VERSION}" >> /etc/yum.repos.d/amdgpu.repo && \ +echo "baseurl=https://repo.radeon.com/rocm/rhel9/${ROCM_VERSION}/main" >> /etc/yum.repos.d/amdgpu.repo && \ +echo "enabled=1" >> /etc/yum.repos.d/amdgpu.repo && \ +echo "priority=50" >> /etc/yum.repos.d/amdgpu.repo && \ +echo "gpgcheck=1" >> /etc/yum.repos.d/amdgpu.repo && \ +echo "gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key" >> /etc/yum.repos.d/amdgpu.repo + +RUN microdnf -y update && \ + microdnf -y install rocm hipcc git which && \ + microdnf clean all + +WORKDIR /workspace + +################################################################################################## + +FROM rocm-base as python-install +ARG PYTHON_VERSION + +ENV VIRTUAL_ENV=/opt/vllm +ENV PATH="$VIRTUAL_ENV/bin:$PATH" +RUN microdnf install -y --setopt=install_weak_deps=0 --nodocs \ + python${PYTHON_VERSION}-devel python${PYTHON_VERSION}-pip python${PYTHON_VERSION}-wheel && \ + python${PYTHON_VERSION} -m venv $VIRTUAL_ENV --system-site-packages && \ + $VIRTUAL_ENV/bin/pip install --no-cache -U pip wheel && \ + microdnf clean all + +################################################################################################## + +FROM python-install as python-rocm-base + +# install common dependencies +RUN --mount=type=cache,target=/root/.cache/pip \ + --mount=type=bind,source=requirements-common.txt,target=requirements-common.txt,readonly \ + --mount=type=bind,source=requirements-rocm.txt,target=requirements-rocm.txt,readonly \ + pip install -r requirements-rocm.txt + +################################################################################################## + +FROM python-rocm-base as base + +# Set the application mount point +ARG APP_MOUNT=/vllm-workspace +WORKDIR ${APP_MOUNT} + +# Upgrade pip and remove unnecessary packages +RUN python3 -m pip install --upgrade --no-cache-dir pip && \ + microdnf -y remove sccache || true && \ + python3 -m pip uninstall -y sccache || true && \ + rm -f "$(which sccache)" && \ + microdnf clean all && \ + rm -rf /var/cache/yum /var/cache/dnf + +# Install torch == 2.5.0 on ROCm +RUN case "$(ls /opt | grep -Po 'rocm-[0-9]\.[0-9]')" in \ + *"rocm-6.1"*) \ + python3 -m pip uninstall -y torch torchvision \ + && python3 -m pip install --no-cache-dir --pre \ + torch==2.5.0.dev20240726 \ + torchvision==0.20.0.dev20240726 \ + --index-url https://download.pytorch.org/whl/nightly/rocm6.1;; \ + *) ;; esac + +# Set environment variables +ENV LLVM_SYMBOLIZER_PATH=/opt/rocm/llvm/bin/llvm-symbolizer +ENV PATH=$PATH:/opt/rocm/bin:/libtorch/bin +ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/rocm/lib:/libtorch/lib +ENV CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/libtorch/include:/libtorch/include/torch/csrc/api/include:/opt/rocm/include +ENV PYTORCH_ROCM_ARCH="gfx908;gfx90a;gfx942;gfx1100" +ENV CCACHE_DIR=/root/.cache/ccache + +################################################################################################## + +FROM base as build_base + +RUN python3 -m pip install --upgrade --no-cache-dir ninja cmake>=3.26 + +################################################################################################## + +################################################################################################## + +### AMD-SMI build stage +FROM build_base AS build_amdsmi + +# Build AMD SMI wheel +RUN cd /opt/rocm/share/amd_smi && \ + python3 -m pip wheel . --wheel-dir=/install + +################################################################################################## + +### Flash-Attention wheel build stage +FROM build_base AS build_fa + +# Whether to install CK-based flash-attention +ARG BUILD_FA="1" +ARG TRY_FA_WHEEL="1" +ARG FA_WHEEL_URL="https://github.com/ROCm/flash-attention/releases/download/v2.5.9post1-cktile-vllm/flash_attn-2.5.9.post1-cp39-cp39-linux_x86_64.whl" +ARG FA_GFX_ARCHS="gfx90a;gfx942" +ARG FA_BRANCH="23a2b1c2" + +# Ensure necessary tools are installed +RUN microdnf install -y wget git && microdnf clean all + +# Build ROCm flash-attention wheel if `BUILD_FA` is set to 1 +RUN --mount=type=cache,target=${CCACHE_DIR} \ + if [ "$BUILD_FA" = "1" ]; then \ + if [ "$TRY_FA_WHEEL" = "1" ] && python3 -m pip install "${FA_WHEEL_URL}"; then \ + # If a suitable wheel exists, download it instead of building FA + mkdir -p /install && wget -N "${FA_WHEEL_URL}" -P /install; \ + else \ + mkdir -p /libs && \ + cd /libs && \ + git clone https://github.com/ROCm/flash-attention.git && \ + cd flash-attention && \ + git checkout "${FA_BRANCH}" && \ + git submodule update --init && \ + GPU_ARCHS="${FA_GFX_ARCHS}" python3 setup.py bdist_wheel --dist-dir=/install; \ + fi; \ + else \ + # Create an empty directory otherwise as later build stages expect one + mkdir -p /install; \ + fi + +################################################################################################## + +### Triton wheel build stage +FROM build_base AS build_triton + +# Whether to build triton on rocm +ARG BUILD_TRITON="1" +ARG TRITON_BRANCH="e0fc12c" + +# Build triton wheel if `BUILD_TRITON` is set to 1 +RUN --mount=type=cache,target=${CCACHE_DIR} \ + if [ "$BUILD_TRITON" = "1" ]; then \ + mkdir -p /libs && cd /libs && \ + git clone https://github.com/OpenAI/triton.git && \ + cd triton && \ + git checkout "${TRITON_BRANCH}" && \ + cd python && \ + python3 setup.py bdist_wheel --dist-dir=/install; \ + else \ + # Create an empty directory otherwise as later build stages expect one + mkdir -p /install; \ + fi + +################################################################################################## + +### Final vLLM build stage +FROM build_base AS final + +# Import the vLLM development directory from the build context +COPY . . + +# Install wget only if it is needed +RUN microdnf -y install wget && microdnf clean all + +# Package upgrades to avoid dependency issues and add functionality +RUN --mount=type=cache,target=/root/.cache/pip \ + python3 -m pip install --upgrade numba scipy huggingface-hub[cli] && \ + microdnf clean all + +ARG PYTORCH_ROCM_ARCH="gfx908;gfx90a;gfx942;gfx1100" + +# Set environment variables for runtime +ENV RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES=1 +# Silences the HF Tokenizers warning +ENV TOKENIZERS_PARALLELISM=false + +# Install dependencies from requirements file and apply ROCm specific patches +RUN --mount=type=cache,target=${CCACHE_DIR} \ + --mount=type=cache,target=/root/.cache/pip \ + python3 -m pip install -Ur requirements-rocm.txt && \ + ROCM_VERSION=$(ls /opt | grep -Po 'rocm-[0-9]+\.[0-9]+') && \ + case "$ROCM_VERSION" in \ + "rocm-6.1") \ + # Apply patch for ROCm 6.1 + wget -N https://github.com/ROCm/vllm/raw/fa78403/rocm_patch/libamdhip64.so.6 -P /opt/rocm/lib && \ + # Remove potentially conflicting HIP runtime from torch + rm -f "$(python3 -c 'import torch; print(torch.__path__[0])')"/lib/libamdhip64.so* || true ;; \ + *) \ + echo "ROCm version $ROCM_VERSION is not supported for patching." ;; \ + esac && \ + python3 setup.py clean --all && \ + python3 setup.py bdist_wheel --dist-dir=dist + +################################################################################################## + +FROM base AS vllm-openai + +WORKDIR /workspace + +# Set up the virtual environment and update PATH +ENV VIRTUAL_ENV=/opt/vllm +ENV PATH=$VIRTUAL_ENV/bin:$PATH + +# Install necessary build tools +RUN microdnf install -y --setopt=install_weak_deps=0 --nodocs gcc && \ + microdnf clean all + +# Copy amdsmi wheel into final image +RUN --mount=type=bind,from=build_amdsmi,src=/install,target=/install \ + mkdir -p libs \ + && cp /install/*.whl libs \ + # Preemptively uninstall to avoid same-version no-installs + && python3 -m pip uninstall -y amdsmi; + +# Copy triton wheel(s) into final image if they were built +RUN --mount=type=bind,from=build_triton,src=/install,target=/install \ + mkdir -p libs \ + && if ls /install/*.whl; then \ + cp /install/*.whl libs \ + # Preemptively uninstall to avoid same-version no-installs + && python3 -m pip uninstall -y triton; fi + +# Copy flash-attn wheel(s) into final image if they were built +RUN --mount=type=bind,from=build_fa,src=/install,target=/install \ + mkdir -p libs \ + && if ls /install/*.whl; then \ + cp /install/*.whl libs \ + # Preemptively uninstall to avoid same-version no-installs + && python3 -m pip uninstall -y flash-attn; fi + +# Copy vLLM wheel(s) into the final image +RUN --mount=type=bind,from=final,src=/vllm-workspace/dist,target=/dist \ + --mount=type=cache,target=/root/.cache/pip \ + cp /dist/*.whl libs \ + # Preemptively uninstall to avoid same-version no-installs + && python3 -m pip uninstall -y vllm + +# Install wheels that were built to the final image +RUN --mount=type=cache,target=/root/.cache/pip \ + if ls libs/*.whl; then \ + python3 -m pip install libs/*.whl; fi + +# Environment variables for runtime configuration +ENV HF_HUB_OFFLINE=1 \ + PORT=8000 \ + HOME=/home/vllm \ + VLLM_USAGE_SOURCE=production-docker-image + +# Set up a non-root user for OpenShift +RUN umask 002 && \ + useradd --uid 2000 --gid 0 vllm && \ + mkdir -p /licenses && \ + chmod g+rwx $HOME /usr/src /workspace + +COPY LICENSE /licenses/vllm.md + +ENV HF_HUB_OFFLINE=1 \ + HOME=/home/vllm \ + # Allow requested max length to exceed what is extracted from the + # config.json + # see: https://github.com/vllm-project/vllm/pull/7080 + VLLM_ALLOW_LONG_MAX_MODEL_LEN=1 \ + VLLM_USAGE_SOURCE=production-docker-image \ + VLLM_WORKER_MULTIPROC_METHOD=fork \ + VLLM_NO_USAGE_STATS=1 + +# Switch to the non-root user +USER 2000 + +# Set the entrypoint +ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]