Skip to content

Commit

Permalink
Add simple pipeline to install dependencies and build a docker
Browse files Browse the repository at this point in the history
image for Nvidia and Intel HW.
  • Loading branch information
sobomax committed Jul 19, 2024
1 parent 0898cf6 commit 1a38e5d
Show file tree
Hide file tree
Showing 7 changed files with 226 additions and 0 deletions.
83 changes: 83 additions & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
# This is a basic workflow to help you get started with Actions

name: Build & Publush

# Controls when the action will run.
on:
# Triggers the workflow on all push or pull request events
push:
pull_request:

release:
types: [created]

# Allows you to run this workflow manually from the Actions tab
workflow_dispatch:

schedule:
- cron: "0 0 * * *"

# added using https://github.com/step-security/secure-repo
permissions:
contents: read

# A workflow run is made up of one or more jobs that can run sequentially or in parallel
jobs:
Docker:
name: Build&Push to DockerHub
if: (github.event_name == 'push' || github.event_name == 'pull_request')
runs-on: [self-hosted, linux, x64]
strategy:
matrix:
infer-hw: ['nvidia', 'intel']
env:
DOCKER_REPO: 'sippylabs/infernos'
BASE_IMAGE: 'ubuntu:24.10'
PYTHON_VER: '3.11'
CONDA_MAINENV: 'Infernos'
INFER_HW: ${{ matrix.infer-hw }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
submodules: 'recursive'

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Log in to Docker Hub
if: github.event_name != 'pull_request'
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}

- name: Extract metadata (tags, labels) for Docker
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.DOCKER_REPO }}
tags: |
type=schedule
type=ref,event=branch,prefix=${{ env.INFER_HW }}-
type=ref,event=tag,prefix=${{ env.INFER_HW }}-
type=ref,event=pr,prefix=${{ env.INFER_HW }}-
type=raw,value=${{ env.INFER_HW }}-latest,enable={{is_default_branch}}
type=sha
- name: Build Docker image
uses: docker/build-push-action@v6
with:
context: .
file: ./docker/Dockerfile
push: true
build-args: |
BASE_IMAGE=${{ env.BASE_IMAGE }}
PYTHON_VER=${{ env.PYTHON_VER }}
CONDA_MAINENV=${{ env.CONDA_MAINENV }}
INFER_HW=${{ env.INFER_HW }}
tags: |
${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha
cache-to: type=gha,mode=max
35 changes: 35 additions & 0 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# syntax=docker/dockerfile:1.7-labs

ARG BASE_IMAGE=ubuntu:24.10
FROM $BASE_IMAGE AS build
LABEL maintainer="Maksym Sobolyev <[email protected]>"

USER root

# Build & install everything
WORKDIR /tmp
ENV DEBIAN_FRONTEND=noninteractivea
ARG APT_UPDATE="apt-get update"
RUN ${APT_UPDATE}
ARG APT_UPGRADE="apt-get upgrade -y"
RUN ${APT_UPGRADE}
ARG APT_INSTALL="apt-get install --no-install-recommends -y"
ARG APT_CLEAN="apt-get clean"
RUN ${APT_INSTALL} lsb-release ca-certificates && ${APT_CLEAN}
COPY docker/install_conda.sh .
RUN ./install_conda.sh
COPY docker/setup_conda.sh .
ARG PYTHON_VER
ARG CONDA_MAINENV
RUN ./setup_conda.sh
COPY docker/install_hw.sh .
ARG INFER_HW
ENV CONDA_ACTIVATE="eval . /opt/conda/etc/profile.d/conda.sh && conda activate ${CONDA_MAINENV}"
ENV PYTHON_CMD="python${PYTHON_VER}"
RUN ./install_hw.sh
COPY docker/install_requirements.sh docker/intel-ray.diff requirements.txt .
ENV CONDA_MAINENV="${CONDA_MAINENV}"
RUN ./install_requirements.sh

COPY --exclude=.git --exclude=.github --link . /Infernos
WORKDIR /Infernos
13 changes: 13 additions & 0 deletions docker/install_conda.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/sh

set -e
set -x

${APT_INSTALL} curl gpg
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > /usr/share/keyrings/conda-archive-keyring.gpg

echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list

${APT_UPDATE}
${APT_INSTALL} conda
${APT_CLEAN}
38 changes: 38 additions & 0 deletions docker/install_hw.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#!/bin/sh

set -e
set -x

PIP_INSTALL="${PYTHON_CMD} -m pip install"

${CONDA_ACTIVATE}

case "${INFER_HW}" in
nvidia)
;;
intel)
curl https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | \
gpg --dearmor --output /usr/share/keyrings/oneapi-archive-keyring.gpg
echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | \
tee /etc/apt/sources.list.d/oneAPI.list
${APT_UPDATE}
${APT_INSTALL} libze1 ocl-icd-libopencl1
${APT_INSTALL} intel-oneapi-dpcpp-cpp-2024.1=2024.1.0-963 intel-oneapi-mkl-devel=2024.1.0-691
${PIP_INSTALL} torch==2.1.0.post2 torchvision==0.16.0.post2 torchaudio==2.1.0.post2 \
intel-extension-for-pytorch==2.1.30.post0 oneccl_bind_pt==2.1.300+xpu \
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
find "/opt/conda/envs/${CONDA_MAINENV}/lib" -name libstdc++.so.6 -delete
printf "/opt/intel/oneapi/mkl/2024.1/lib\n/opt/intel/oneapi/compiler/2024.1/lib\n" > \
/etc/ld.so.conf.d/zzz-intel-oneapi.conf
ldconfig
;;
*)
echo "Unknown INFER_HW: '${INFER_HW}'" >&2
false
;;
esac

apt-get autoremove -y
${APT_CLEAN}
rm -r ~/.cache
mkdir ~/.cache
26 changes: 26 additions & 0 deletions docker/install_requirements.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/bin/sh

set -e
set -x

DEV_PKGS="gcc g++ libc6-dev cmake pkg-config make git patch"
PIP_INSTALL="${PYTHON_CMD} -m pip install"

${APT_INSTALL} ${DEV_PKGS}
${CONDA_ACTIVATE}

${PIP_INSTALL} -r requirements.txt

apt-get remove -y ${DEV_PKGS}

if [ "${INFER_HW}" = "intel" ]
then
patch -d "/opt/conda/envs/${CONDA_MAINENV}/lib/python${PYTHON_VER}/site-packages" \
-p2 -s < intel-ray.diff
apt-mark manual intel-oneapi-dpcpp-cpp-2024.1 intel-oneapi-mkl-devel
fi

apt-get autoremove -y
${APT_CLEAN}
rm -r ~/.cache
mkdir ~/.cache
17 changes: 17 additions & 0 deletions docker/intel-ray.diff
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
commit 85baaa1c10a957c747f54ec0705e6b7cbfa972d1
Author: Maksym Sobolyev <[email protected]>
Date: Tue Mar 12 22:59:59 2024 -0700

Hack on ipex.

diff --git a/python/ray/_private/workers/default_worker.py b/python/ray/_private/workers/default_worker.py
index 4c2109831c..62115940d0 100644
--- a/python/ray/_private/workers/default_worker.py
+++ b/python/ray/_private/workers/default_worker.py
@@ -1,3 +1,6 @@
+try: import intel_extension_for_pytorch as ipex
+except ModuleNotFoundError: ipex = None
+
import os
import argparse
import base64
14 changes: 14 additions & 0 deletions docker/setup_conda.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/bin/sh

set -e
set -x

. /opt/conda/etc/profile.d/conda.sh
conda create -y --name "${CONDA_MAINENV}" python=${PYTHON_VER}
conda activate "${CONDA_MAINENV}"
conda install -y pip
echo "/opt/conda/envs/${CONDA_MAINENV}/lib" > "/etc/ld.so.conf.d/zzz-conda-${CONDA_MAINENV}.conf"
ldconfig
rm -r /opt/conda/pkgs
rm -r ~/.cache
mkdir ~/.cache

0 comments on commit 1a38e5d

Please sign in to comment.