From 1a38e5d786f0c22a6a543f85814a09669aea2daf Mon Sep 17 00:00:00 2001 From: Maksym Sobolyev Date: Wed, 17 Jul 2024 16:08:16 -0700 Subject: [PATCH] Add simple pipeline to install dependencies and build a docker image for Nvidia and Intel HW. --- .github/workflows/main.yml | 83 ++++++++++++++++++++++++++++++++++ docker/Dockerfile | 35 ++++++++++++++ docker/install_conda.sh | 13 ++++++ docker/install_hw.sh | 38 ++++++++++++++++ docker/install_requirements.sh | 26 +++++++++++ docker/intel-ray.diff | 17 +++++++ docker/setup_conda.sh | 14 ++++++ 7 files changed, 226 insertions(+) create mode 100644 .github/workflows/main.yml create mode 100644 docker/Dockerfile create mode 100755 docker/install_conda.sh create mode 100755 docker/install_hw.sh create mode 100755 docker/install_requirements.sh create mode 100644 docker/intel-ray.diff create mode 100755 docker/setup_conda.sh diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 0000000..5b8a870 --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,83 @@ +# This is a basic workflow to help you get started with Actions + +name: Build & Publush + +# Controls when the action will run. +on: + # Triggers the workflow on all push or pull request events + push: + pull_request: + + release: + types: [created] + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + + schedule: + - cron: "0 0 * * *" + +# added using https://github.com/step-security/secure-repo +permissions: + contents: read + +# A workflow run is made up of one or more jobs that can run sequentially or in parallel +jobs: + Docker: + name: Build&Push to DockerHub + if: (github.event_name == 'push' || github.event_name == 'pull_request') + runs-on: [self-hosted, linux, x64] + strategy: + matrix: + infer-hw: ['nvidia', 'intel'] + env: + DOCKER_REPO: 'sippylabs/infernos' + BASE_IMAGE: 'ubuntu:24.10' + PYTHON_VER: '3.11' + CONDA_MAINENV: 'Infernos' + INFER_HW: ${{ matrix.infer-hw }} + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + submodules: 'recursive' + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to Docker Hub + if: github.event_name != 'pull_request' + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.DOCKER_REPO }} + tags: | + type=schedule + type=ref,event=branch,prefix=${{ env.INFER_HW }}- + type=ref,event=tag,prefix=${{ env.INFER_HW }}- + type=ref,event=pr,prefix=${{ env.INFER_HW }}- + type=raw,value=${{ env.INFER_HW }}-latest,enable={{is_default_branch}} + type=sha + + - name: Build Docker image + uses: docker/build-push-action@v6 + with: + context: . + file: ./docker/Dockerfile + push: true + build-args: | + BASE_IMAGE=${{ env.BASE_IMAGE }} + PYTHON_VER=${{ env.PYTHON_VER }} + CONDA_MAINENV=${{ env.CONDA_MAINENV }} + INFER_HW=${{ env.INFER_HW }} + tags: | + ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=gha + cache-to: type=gha,mode=max diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 0000000..a03836e --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,35 @@ +# syntax=docker/dockerfile:1.7-labs + +ARG BASE_IMAGE=ubuntu:24.10 +FROM $BASE_IMAGE AS build +LABEL maintainer="Maksym Sobolyev " + +USER root + +# Build & install everything +WORKDIR /tmp +ENV DEBIAN_FRONTEND=noninteractivea +ARG APT_UPDATE="apt-get update" +RUN ${APT_UPDATE} +ARG APT_UPGRADE="apt-get upgrade -y" +RUN ${APT_UPGRADE} +ARG APT_INSTALL="apt-get install --no-install-recommends -y" +ARG APT_CLEAN="apt-get clean" +RUN ${APT_INSTALL} lsb-release ca-certificates && ${APT_CLEAN} +COPY docker/install_conda.sh . +RUN ./install_conda.sh +COPY docker/setup_conda.sh . +ARG PYTHON_VER +ARG CONDA_MAINENV +RUN ./setup_conda.sh +COPY docker/install_hw.sh . +ARG INFER_HW +ENV CONDA_ACTIVATE="eval . /opt/conda/etc/profile.d/conda.sh && conda activate ${CONDA_MAINENV}" +ENV PYTHON_CMD="python${PYTHON_VER}" +RUN ./install_hw.sh +COPY docker/install_requirements.sh docker/intel-ray.diff requirements.txt . +ENV CONDA_MAINENV="${CONDA_MAINENV}" +RUN ./install_requirements.sh + +COPY --exclude=.git --exclude=.github --link . /Infernos +WORKDIR /Infernos diff --git a/docker/install_conda.sh b/docker/install_conda.sh new file mode 100755 index 0000000..9dd8519 --- /dev/null +++ b/docker/install_conda.sh @@ -0,0 +1,13 @@ +#!/bin/sh + +set -e +set -x + +${APT_INSTALL} curl gpg +curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > /usr/share/keyrings/conda-archive-keyring.gpg + +echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list + +${APT_UPDATE} +${APT_INSTALL} conda +${APT_CLEAN} diff --git a/docker/install_hw.sh b/docker/install_hw.sh new file mode 100755 index 0000000..79727e4 --- /dev/null +++ b/docker/install_hw.sh @@ -0,0 +1,38 @@ +#!/bin/sh + +set -e +set -x + +PIP_INSTALL="${PYTHON_CMD} -m pip install" + +${CONDA_ACTIVATE} + +case "${INFER_HW}" in +nvidia) + ;; +intel) + curl https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | \ + gpg --dearmor --output /usr/share/keyrings/oneapi-archive-keyring.gpg + echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | \ + tee /etc/apt/sources.list.d/oneAPI.list + ${APT_UPDATE} + ${APT_INSTALL} libze1 ocl-icd-libopencl1 + ${APT_INSTALL} intel-oneapi-dpcpp-cpp-2024.1=2024.1.0-963 intel-oneapi-mkl-devel=2024.1.0-691 + ${PIP_INSTALL} torch==2.1.0.post2 torchvision==0.16.0.post2 torchaudio==2.1.0.post2 \ + intel-extension-for-pytorch==2.1.30.post0 oneccl_bind_pt==2.1.300+xpu \ + --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ + find "/opt/conda/envs/${CONDA_MAINENV}/lib" -name libstdc++.so.6 -delete + printf "/opt/intel/oneapi/mkl/2024.1/lib\n/opt/intel/oneapi/compiler/2024.1/lib\n" > \ + /etc/ld.so.conf.d/zzz-intel-oneapi.conf + ldconfig + ;; +*) + echo "Unknown INFER_HW: '${INFER_HW}'" >&2 + false + ;; +esac + +apt-get autoremove -y +${APT_CLEAN} +rm -r ~/.cache +mkdir ~/.cache diff --git a/docker/install_requirements.sh b/docker/install_requirements.sh new file mode 100755 index 0000000..0db1c00 --- /dev/null +++ b/docker/install_requirements.sh @@ -0,0 +1,26 @@ +#!/bin/sh + +set -e +set -x + +DEV_PKGS="gcc g++ libc6-dev cmake pkg-config make git patch" +PIP_INSTALL="${PYTHON_CMD} -m pip install" + +${APT_INSTALL} ${DEV_PKGS} +${CONDA_ACTIVATE} + +${PIP_INSTALL} -r requirements.txt + +apt-get remove -y ${DEV_PKGS} + +if [ "${INFER_HW}" = "intel" ] +then + patch -d "/opt/conda/envs/${CONDA_MAINENV}/lib/python${PYTHON_VER}/site-packages" \ + -p2 -s < intel-ray.diff + apt-mark manual intel-oneapi-dpcpp-cpp-2024.1 intel-oneapi-mkl-devel +fi + +apt-get autoremove -y +${APT_CLEAN} +rm -r ~/.cache +mkdir ~/.cache diff --git a/docker/intel-ray.diff b/docker/intel-ray.diff new file mode 100644 index 0000000..eef9f10 --- /dev/null +++ b/docker/intel-ray.diff @@ -0,0 +1,17 @@ +commit 85baaa1c10a957c747f54ec0705e6b7cbfa972d1 +Author: Maksym Sobolyev +Date: Tue Mar 12 22:59:59 2024 -0700 + + Hack on ipex. + +diff --git a/python/ray/_private/workers/default_worker.py b/python/ray/_private/workers/default_worker.py +index 4c2109831c..62115940d0 100644 +--- a/python/ray/_private/workers/default_worker.py ++++ b/python/ray/_private/workers/default_worker.py +@@ -1,3 +1,6 @@ ++try: import intel_extension_for_pytorch as ipex ++except ModuleNotFoundError: ipex = None ++ + import os + import argparse + import base64 diff --git a/docker/setup_conda.sh b/docker/setup_conda.sh new file mode 100755 index 0000000..597283f --- /dev/null +++ b/docker/setup_conda.sh @@ -0,0 +1,14 @@ +#!/bin/sh + +set -e +set -x + +. /opt/conda/etc/profile.d/conda.sh +conda create -y --name "${CONDA_MAINENV}" python=${PYTHON_VER} +conda activate "${CONDA_MAINENV}" +conda install -y pip +echo "/opt/conda/envs/${CONDA_MAINENV}/lib" > "/etc/ld.so.conf.d/zzz-conda-${CONDA_MAINENV}.conf" +ldconfig +rm -r /opt/conda/pkgs +rm -r ~/.cache +mkdir ~/.cache