forked from kserve/kserve
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Signed-off-by: Sivanantham Chinnaiyan <[email protected]>
- Loading branch information
1 parent
d19e310
commit 51c777d
Showing
5 changed files
with
240 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
name: Huggingface Vllm CPU Docker Publisher | ||
|
||
on: | ||
push: | ||
# Publish `master` as Docker `latest` image. | ||
branches: | ||
- master | ||
|
||
# Publish `v1.2.3` tags as releases. | ||
tags: | ||
- v* | ||
# Run tests for any PRs. | ||
pull_request: | ||
|
||
env: | ||
IMAGE_NAME: huggingfaceserver-vllm-cpu | ||
|
||
concurrency: | ||
group: ${{ github.workflow }}-${{ github.ref }} | ||
cancel-in-progress: true | ||
|
||
jobs: | ||
# Run tests. | ||
# See also https://docs.docker.com/docker-hub/builds/automated-testing/ | ||
test: | ||
runs-on: ubuntu-latest | ||
|
||
steps: | ||
- name: Checkout source | ||
uses: actions/checkout@v4 | ||
|
||
- name: Free-up disk space | ||
uses: ./.github/actions/free-up-disk-space | ||
|
||
- name: Setup Docker Buildx | ||
uses: docker/setup-buildx-action@v3 | ||
|
||
- name: CPU Info | ||
run: cat /proc/cpuinfo | ||
|
||
- name: Run tests | ||
uses: docker/build-push-action@v5 | ||
with: | ||
platforms: linux/amd64 | ||
context: python | ||
file: python/huggingface_vllm_cpu.Dockerfile | ||
push: false | ||
# https://github.com/docker/buildx/issues/1533 | ||
provenance: false | ||
|
||
# Push image to GitHub Packages. | ||
# See also https://docs.docker.com/docker-hub/builds/ | ||
# push: | ||
# # Ensure test job passes before pushing image. | ||
## needs: test | ||
# | ||
# runs-on: ubuntu-latest | ||
## if: github.event_name == 'push' | ||
# | ||
# steps: | ||
# - name: Checkout source | ||
# uses: actions/checkout@v4 | ||
# | ||
# - name: Free-up disk space | ||
# uses: ./.github/actions/free-up-disk-space | ||
# | ||
# - name: Setup Docker Buildx | ||
# uses: docker/setup-buildx-action@v3 | ||
# | ||
# - name: Login to DockerHub | ||
# uses: docker/login-action@v3 | ||
# with: | ||
# username: ${{ secrets.DOCKER_USER }} | ||
# password: ${{ secrets.DOCKER_PASSWORD }} | ||
# | ||
# - name: Export version variable | ||
# run: | | ||
# IMAGE_ID=sivanantha/$IMAGE_NAME | ||
# | ||
# # Change all uppercase to lowercase | ||
# IMAGE_ID=$(echo $IMAGE_ID | tr '[A-Z]' '[a-z]') | ||
# | ||
# # Strip git ref prefix from version | ||
# VERSION=$(echo "${{ github.ref }}" | sed -e 's,.*/\(.*\),\1,') | ||
# | ||
# # Strip "v" prefix from tag name | ||
# # [[ "${{ github.ref }}" == "refs/tags/"* ]] && VERSION=$(echo $VERSION | sed -e 's/^v//') | ||
# | ||
# # Use Docker `latest` tag convention | ||
# [ "$VERSION" == "master" ] && VERSION=latest | ||
# | ||
# echo VERSION=$VERSION >> $GITHUB_ENV | ||
# echo IMAGE_ID=$IMAGE_ID >> $GITHUB_ENV | ||
# | ||
# - name: Build and push | ||
# uses: docker/build-push-action@v5 | ||
# with: | ||
# platforms: linux/amd64 | ||
# context: python | ||
# file: python/huggingface_vllm_cpu_server.Dockerfile | ||
# push: true | ||
# tags: ${{ env.IMAGE_ID }}:${{ env.VERSION }} | ||
# # https://github.com/docker/buildx/issues/1533 | ||
# provenance: false |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
ARG PYTHON_VERSION=3.10 | ||
ARG BASE_IMAGE=python:${PYTHON_VERSION}-slim-bookworm | ||
ARG VENV_PATH=/prod_venv | ||
|
||
FROM ${BASE_IMAGE} AS builder | ||
|
||
# Install Poetry | ||
ARG POETRY_HOME=/opt/poetry | ||
ARG POETRY_VERSION=1.7.1 | ||
|
||
# Install vllm | ||
ARG VLLM_VERSION=0.5.0.post1 | ||
|
||
RUN apt-get update -y && apt-get install git gcc-12 g++-12 wget tar -y --no-install-recommends && apt-get clean && \ | ||
rm -rf /var/lib/apt/lists/* | ||
RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12 | ||
|
||
|
||
RUN wget https://github.com/vllm-project/vllm/archive/refs/tags/v${VLLM_VERSION}.tar.gz -O vllm.tar.gz && \ | ||
tar -xzvp -f vllm.tar.gz | ||
|
||
RUN python3 -m venv ${POETRY_HOME} && ${POETRY_HOME}/bin/pip3 install poetry==${POETRY_VERSION} | ||
ENV PATH="$PATH:${POETRY_HOME}/bin" | ||
|
||
# Activate virtual env | ||
ARG VENV_PATH | ||
ENV VIRTUAL_ENV=${VENV_PATH} | ||
RUN python3 -m venv $VIRTUAL_ENV | ||
ENV PATH="$VIRTUAL_ENV/bin:$PATH" | ||
|
||
|
||
COPY kserve/pyproject.toml kserve/poetry.lock kserve/ | ||
RUN --mount=type=cache,target=/root/.cache cd kserve && poetry install --no-root --no-interaction | ||
COPY kserve kserve | ||
RUN --mount=type=cache,target=/root/.cache cd kserve && poetry install --no-interaction | ||
|
||
COPY huggingfaceserver/pyproject.toml huggingfaceserver/poetry.lock huggingfaceserver/ | ||
RUN --mount=type=cache,target=/root/.cache cd huggingfaceserver && poetry install --no-root --no-interaction | ||
COPY huggingfaceserver huggingfaceserver | ||
RUN --mount=type=cache,target=/root/.cache cd huggingfaceserver && poetry install --no-interaction | ||
|
||
# Install vllm | ||
RUN --mount=type=cache,target=/root/.cache cd vllm-${VLLM_VERSION} && pip install wheel packaging ninja setuptools>=49.4.0 && \ | ||
pip install -v -r requirements-cpu.txt --extra-index-url https://download.pytorch.org/whl/cpu | ||
# Performance boost for PyTorch in intel cpu | ||
RUN --mount=type=cache,target=/root/.cache pip install https://intel-extension-for-pytorch.s3.amazonaws.com/ipex_dev/cpu/intel_extension_for_pytorch-2.3.100%2Bgit0eb3473-cp310-cp310-linux_x86_64.whl | ||
RUN cd vllm-${VLLM_VERSION} && VLLM_TARGET_DEVICE=cpu python setup.py install | ||
|
||
|
||
FROM ${BASE_IMAGE} AS prod | ||
|
||
COPY third_party third_party | ||
|
||
# For high performance memory allocation and better cache locality | ||
RUN apt-get update -y && apt-get install libtcmalloc-minimal4 -y --no-install-recommends && apt-get clean && \ | ||
rm -rf /var/lib/apt/lists/* \ | ||
&& echo 'export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4:$LD_PRELOAD' >> ~/.bashrc | ||
|
||
|
||
# Activate virtual env | ||
ARG VENV_PATH | ||
ENV VIRTUAL_ENV=${VENV_PATH} | ||
ENV PATH="$VIRTUAL_ENV/bin:$PATH" | ||
|
||
RUN useradd kserve -m -u 1000 -d /home/kserve | ||
|
||
COPY --from=builder --chown=kserve:kserve $VIRTUAL_ENV $VIRTUAL_ENV | ||
COPY --from=builder kserve kserve | ||
COPY --from=builder huggingfaceserver huggingfaceserver | ||
|
||
# Set a writable Hugging Face home folder to avoid permission issue. See https://github.com/kserve/kserve/issues/3562 | ||
ENV HF_HOME="/tmp/huggingface" | ||
# https://huggingface.co/docs/safetensors/en/speed#gpu-benchmark | ||
ENV SAFETENSORS_FAST_GPU="1" | ||
# https://huggingface.co/docs/huggingface_hub/en/package_reference/environment_variables#hfhubdisabletelemetry | ||
ENV HF_HUB_DISABLE_TELEMETRY="1" | ||
|
||
USER 1000 | ||
ENTRYPOINT ["python3", "-m", "huggingfaceserver"] | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters