Skip to content

Commit

Permalink
Init
Browse files Browse the repository at this point in the history
  • Loading branch information
davidspek committed Aug 31, 2021
0 parents commit dab5faa
Show file tree
Hide file tree
Showing 40 changed files with 1,192 additions and 0 deletions.
154 changes: 154 additions & 0 deletions .github/workflows/build-all.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
name: Build Notebook Images

on:
push:
branches: [ master ]
tags:
- 'v*.*.*'
pull_request:
branches:
- 'master'

jobs:
build-matrix:
name: Create Build Matrix
runs-on: ubuntu-latest
steps:
-
name: Checkout
id: set-matrix
uses: actions/checkout@v2
with:
fetch-depth: 0 # No shallow clone, we need all history
-
name: Bump version and push tag
if: github.event_name != 'pull_request'
id: tag_version
uses: mathieudutour/[email protected]
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
-
name: Create a GitHub release
if: github.event_name != 'pull_request'
uses: actions/create-release@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
tag_name: ${{ steps.tag_version.outputs.new_tag }}
release_name: Release ${{ steps.tag_version.outputs.new_tag }}
body: ${{ steps.tag_version.outputs.changelog }}
- name: generate matrix
id: generate-matrix
env:
RELEASE: ${{ steps.tag_version.outputs.new_tag }}
# run: echo "::set-output name=matrix::{\"include\":[$(for changed_folder in $(dirname $(git diff --name-only ${{ github.event.before }}..${{ github.event.after }}) | sort -u); do find $changed_folder -name "*Dockerfile"; done | sed 's/^\|$/"/g'|paste -sd, -)]}"
run: |
if [ -z "${RELEASE}" ];
then
RELEASE="pr"
CHANGED_DIRS=$(git diff-tree --no-commit-id --name-only -r ${{ github.event.pull_request.head.sha }} | xargs -I {} dirname {})
else
CHANGED_DIRS=$(git diff-tree --no-commit-id --name-only -r ${{ github.sha }} | xargs -I {} dirname {})
fi
if [ -z "${CHANGED_DIRS}" ];
then
CHANGED_DIRS=$(dirname $(git diff --name-only ${{ github.event.before }}..${{ github.event.after }}))
if [ "${CHANGED_DIRS}" == "." ];
then
CHANGED_DIRS=
fi
fi
echo "${CHANGED_DIRS}"
DOCKERFILES=$(for CHANGED_DIR in ${CHANGED_DIRS}; do find ${CHANGED_DIR} -name "*Dockerfile"; done | sort -u)
echo "${DOCKERFILES}"
MATRIX_PROJECTS_JSON="["
MATRIX_INCLUDE_JSON="["
for DOCKERFILE in ${DOCKERFILES}; do
DIR=$(dirname ${DOCKERFILE})
if [[ "$(basename ${DOCKERFILE})" == *"cuda"* ]]
then
MATRIX_PROJECTS_JSON+=$(sed 's/^/"/;s/$/"/' <<< "${DIR}"-cuda)
PROJECT="${DIR}"-cuda
else
MATRIX_PROJECTS_JSON+=$(sed 's/^/"/;s/$/"/' <<< "${DIR}")
PROJECT="${DIR}"
fi
echo "{$MATRIX_PROJECTS_JSON}"
MATRIX_INCLUDE_JSON+="{\"path\": \"${DIR}\", \"project\": \"${PROJECT}\", \"dockerfile\": \"${DOCKERFILE}\", \"version\": \"${RELEASE}\"}"
echo "${MATRIC_INCLUDE_JSON}"
done
echo "{$MATRIX_PROJECTS_JSON}"
echo "${MATRIC_INCLUDE_JSON}"
MATRIX_INCLUDE_JSON="${MATRIX_INCLUDE_JSON//\}\{/\}, \{}"
MATRIX_INCLUDE_JSON+="]"
MATRIX_PROJECTS_JSON="${MATRIX_PROJECTS_JSON//\"\"/\", \"}"
MATRIX_PROJECTS_JSON+="]"
MATRIX_JSON="{\"include\": ${MATRIX_INCLUDE_JSON}}"
echo "${MATRIX_JSON}"
CONTINUE_DOCKER_JOB="no"
if [[ "${MATRIX_PROJECTS_JSON}" != "[]" ]]; then
CONTINUE_DOCKER_JOB="yes"
fi
echo "${CONTINUE_DOCKER_JOB}"
echo "::set-output name=continue::${CONTINUE_DOCKER_JOB}"
echo "::set-output name=matrix::${MATRIX_JSON}"
outputs:
matrix: ${{ steps.generate-matrix.outputs.matrix }}
continue: ${{ steps.generate-matrix.outputs.continue }}
build-images:
if: needs.build-matrix.outputs.continue == 'yes'
name: Build and push notebook images
runs-on: ubuntu-latest
needs: build-matrix
strategy:
matrix: ${{ fromJson(needs.build-matrix.outputs.matrix) }}
steps:
-
name: Checkout
uses: actions/checkout@v2
-
name: Get latest tag
id: latest_tag
uses: DavidSpek/[email protected]
with:
img: 'ghcr.io/pluralsh/kubeflow-notebooks-${{ matrix.project }}'
-
name: Docker meta
id: meta
uses: crazy-max/ghaction-docker-meta@v3
with:
# list of Docker images to use as base name for tags
images: |
ghcr.io/pluralsh/kubeflow-notebooks-${{ matrix.project }}
# generate Docker tags based on the following events/attributes
tags: |
type=ref,event=branch
type=ref,event=pr
type=semver,pattern={{raw}},value=${{ matrix.project }}
type=sha
-
name: Set up QEMU
uses: docker/setup-qemu-action@v1
-
name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1
-
name: Login to GHCR
if: github.event_name != 'pull_request'
uses: docker/login-action@v1
with:
registry: ghcr.io
username: ${{ github.repository_owner }}
password: ${{ secrets.MY_GITHUB_TOKEN }}
-
name: Build and push
uses: docker/build-push-action@v2
with:
context: ${{ matrix.path }}
file: ${{ matrix.dockerfile }}
push: ${{ github.event_name != 'pull_request' }}
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
83 changes: 83 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
# Example Notebook Servers

> 🛑️️ These server images are provided as __examples only__ and are supported on a best-effort basis.
> Contributions are greatly appreciated.
## Overview

In this folder, we have tried to make an extendable image structure which you can easily augment with additional tools and packages.

![flow-chart of kubeflow notebook server images](image-flow-chart.png)

__The following images are considered 'base' images, which you can extend:__

Name | Description
--- | ---
[./base](./base) | the common base for all other images
[./jupyter](./jupyter) | the base [JupyterLab](https://github.com/jupyterlab/jupyterlab) image
[./codeserver](./codeserver) | the base [code-server](https://github.com/cdr/code-server) (Visual Studio Code) image
[./rstudio](./rstudio) | the base [RStudio](https://github.com/rstudio/rstudio) image

__Important points about the images:__

- they make use of the [s6-overlay](https://github.com/just-containers/s6-overlay) init system
- they all run as the non-root `jovyan` user

## How do I extend these images?

> ⚠️ any changes made by users __after spawning__ a Kubeflow notebook will only last the lifetime of the pod, unless they are installed into a PVC-backed directory
### Adding conda/pip packages

Extend one of the base images and install any `pip` or `conda` packages your Kubeflow Notebook users are likely to need.

As a guide, look at [jupyter-pytorch-full.cpu](./jupyter-pytorch-full/cpu.Dockerfile) for a `pip install ...` example, and the [rstudio-tidyverse](./rstudio-tidyverse/Dockerfile) for `conda install ...`.

__WARNING:__ a common cause of errors is users running `pip install --user ...`, causing the home-directory (which is backed by a PVC) to contain a different or incompatible version of a package contained in `/opt/conda/...`

### Adding apt-get packages

Extend one of the base images and install any `apt-get` packages your Kubeflow Notebook users are likely to need.

__WARNING:__ ensure you swap to `root` in the Dockerfile before running `apt-get`, and swap back to `jovyan` after.

### Adding container startup scripts

Some use-cases might require custom scripts to run during the startup of the Notebook Server container, or advanced users might want to add additional services that run inside the container (for example, an Apache or NGINX web server).
To make this easy, we use the [s6-overlay](https://github.com/just-containers/s6-overlay).

The [s6-overlay](https://github.com/just-containers/s6-overlay) differs from other init systems, such as the popular [tini](https://github.com/krallin/tini).
While `tini` was created to handle a single process running in a container as PID 1, the `s6-overlay` is built to manage multiple processes and allows the creator of the image to determine which process failures should silently restart, and which should cause the container to exit.

__Custom startup scripts:__

Scripts that need to run during the startup of the container can be placed in `/etc/cont-init.d/`, and are executed in ascending alphanumeric order.

An example of a startup script can be found in [./rstudio/s6/cont-init.d/02-rstudio-env-fix](./rstudio/s6/cont-init.d/02-rstudio-env-fix).
This script uses the [with-contenv](https://github.com/just-containers/s6-overlay#container-environment) helper so that environment variables (passed to container) are available in the script.
The purpose of this script is to snapshot any `KUBERNETES_*` environment variables into the `Renviron.site` at pod startup, as without these variables `kubectl` does not work.

__Custom service scripts:__

Extra services to be monitored by `s6-overlay` should be placed in their own folder under `/etc/services.d/` containing a script called `run` and optionally a finishing script `finish`.

An example of a service can be found in [jupyter/s6/services.d/jupyterlab](jupyter/s6/services.d/jupyterlab) which is used to start JupyterLab itself.
For more information about the `run` and `finish` scripts, please see the [s6-overlay documentation](https://github.com/just-containers/s6-overlay#writing-a-service-script).

__WARNING:__ our example images run `s6-overlay` as `$NB_USER` not `root`, meaning any files or scripts related to `s6-overlay` should be owned by the `$NB_USER` user

There may be cases when you need to run a service as root.
To do this, you can change the Dockerfile to have `USER root` at the end, and then use `s6-setuidgid` to run the user-facing services as `$NB_USER`.

For example, here is a `run` script for `code-server`:

```bash
#!/usr/bin/with-contenv bash

export SHELL='/bin/bash'
exec s6-setuidgid $NB_USER \
/usr/local/bin/code-server \
--bind-addr 0.0.0.0:8888 \
--disable-telemetry \
--auth none
```
76 changes: 76 additions & 0 deletions base/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
FROM ubuntu:20.04

# common environemnt variables
ENV NB_USER jovyan
ENV NB_UID 1000
ENV NB_PREFIX /
ENV HOME /home/$NB_USER
ENV SHELL /bin/bash

# args - software versions
ARG KUBECTL_ARCH="amd64"
ARG KUBECTL_VERSION=v1.21.0
ARG S6_ARCH="amd64"
# renovate: datasource=github-tags depName=just-containers/s6-overlay versioning=loose
ARG S6_VERSION=v2.2.0.3

# set shell to bash
SHELL ["/bin/bash", "-c"]

# install - usefull linux packages
RUN export DEBIAN_FRONTEND=noninteractive \
&& apt-get -yq update \
&& apt-get -yq upgrade \
&& apt-get -yq install --no-install-recommends \
apt-transport-https \
bash \
bzip2 \
ca-certificates \
curl \
git \
gnupg \
gnupg2 \
locales \
lsb-release \
nano \
software-properties-common \
tzdata \
unzip \
vim \
wget \
zip \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*

# install - s6 overlay
RUN export GNUPGHOME=/tmp/ \
&& curl -sL "https://github.com/just-containers/s6-overlay/releases/download/${S6_VERSION}/s6-overlay-${S6_ARCH}-installer" -o /tmp/s6-overlay-${S6_VERSION}-installer \
&& curl -sL "https://github.com/just-containers/s6-overlay/releases/download/${S6_VERSION}/s6-overlay-${S6_ARCH}-installer.sig" -o /tmp/s6-overlay-${S6_VERSION}-installer.sig \
&& gpg --keyserver keys.gnupg.net --keyserver pgp.surfnet.nl --recv-keys 6101B2783B2FD161 \
&& gpg -q --verify /tmp/s6-overlay-${S6_VERSION}-installer.sig /tmp/s6-overlay-${S6_VERSION}-installer \
&& chmod +x /tmp/s6-overlay-${S6_VERSION}-installer \
&& /tmp/s6-overlay-${S6_VERSION}-installer / \
&& rm /tmp/s6-overlay-${S6_VERSION}-installer.sig /tmp/s6-overlay-${S6_VERSION}-installer

# install - kubectl
RUN curl -sL "https://dl.k8s.io/release/${KUBECTL_VERSION}/bin/linux/${KUBECTL_ARCH}/kubectl" -o /usr/local/bin/kubectl \
&& curl -sL "https://dl.k8s.io/${KUBECTL_VERSION}/bin/linux/${KUBECTL_ARCH}/kubectl.sha256" -o /tmp/kubectl.sha256 \
&& echo "$(cat /tmp/kubectl.sha256) /usr/local/bin/kubectl" | sha256sum --check \
&& rm /tmp/kubectl.sha256 \
&& chmod +x /usr/local/bin/kubectl

# create user and set required ownership
RUN useradd -M -s /bin/bash -N -u ${NB_UID} ${NB_USER} \
&& mkdir -p ${HOME} \
&& chown -R ${NB_USER}:users ${HOME} \
&& chown -R ${NB_USER}:users /usr/local/bin \
&& chown -R ${NB_USER}:users /etc/s6

# set locale configs
RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen \
&& locale-gen
ENV LANG en_US.UTF-8
ENV LANGUAGE en_US.UTF-8
ENV LC_ALL en_US.UTF-8

USER $NB_UID
70 changes: 70 additions & 0 deletions codeserver-python/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
FROM kubeflownotebooks/codeserver:v0.3.53

USER root

# args - software versions
ARG CODESERVER_PYTHON_VERSION=2021.5.842923320
ARG CODESERVER_GITLENS_VERSION=11.4.1
ARG CODESERVER_GIT_GRAPH_VERSION=1.30.0
ARG MINIFORGE_ARCH="x86_64"
# renovate: datasource=github-tags depName=conda-forge/miniforge versioning=loose
ARG MINIFORGE_VERSION=4.10.3-5
ARG PIP_VERSION=21.1.2
ARG PYTHON_VERSION=3.8.10

# update - ensure apt packages are always updated
RUN export DEBIAN_FRONTEND=noninteractive \
&& apt-get -yq update \
&& apt-get -yq upgrade \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*

# setup environment for conda
ENV CONDA_DIR /opt/conda
ENV PATH "${CONDA_DIR}/bin:${PATH}"
RUN mkdir -p ${CONDA_DIR} \
&& chown -R ${NB_USER}:users ${CONDA_DIR}

USER $NB_UID

# install - conda, pip, python
RUN curl -sL "https://github.com/conda-forge/miniforge/releases/download/${MINIFORGE_VERSION}/Miniforge3-${MINIFORGE_VERSION}-Linux-${MINIFORGE_ARCH}.sh" -o /tmp/Miniforge3.sh \
&& curl -sL "https://github.com/conda-forge/miniforge/releases/download/${MINIFORGE_VERSION}/Miniforge3-${MINIFORGE_VERSION}-Linux-${MINIFORGE_ARCH}.sh.sha256" -o /tmp/Miniforge3.sh.sha256 \
&& echo "$(cat /tmp/Miniforge3.sh.sha256 | awk '{ print $1; }') /tmp/Miniforge3.sh" | sha256sum --check \
&& rm /tmp/Miniforge3.sh.sha256 \
&& /bin/bash /tmp/Miniforge3.sh -b -f -p ${CONDA_DIR} \
&& rm /tmp/Miniforge3.sh \
&& conda config --system --set auto_update_conda false \
&& conda config --system --set show_channel_urls true \
&& echo "conda ${MINIFORGE_VERSION:0:-2}" >> ${CONDA_DIR}/conda-meta/pinned \
&& echo "python ${PYTHON_VERSION}" >> ${CONDA_DIR}/conda-meta/pinned \
&& conda install -y -q \
python=${PYTHON_VERSION} \
conda=${MINIFORGE_VERSION:0:-2} \
pip=${PIP_VERSION} \
&& conda update -y -q --all \
&& conda clean -a -f -y \
&& chown -R ${NB_USER}:users ${CONDA_DIR} \
&& chown -R ${NB_USER}:users ${HOME}

# install - requirements.txt
COPY --chown=jovyan:users requirements.txt /tmp
RUN python3 -m pip install -r /tmp/requirements.txt --quiet --no-cache-dir \
&& rm -f /tmp/requirements.txt \
&& chown -R ${NB_USER}:users ${CONDA_DIR} \
&& chown -R ${NB_USER}:users ${HOME}

# install - codeserver extensions
RUN code-server --install-extension "ms-python.python@${CODESERVER_PYTHON_VERSION}" \
&& code-server --install-extension "eamodio.gitlens@${CODESERVER_GITLENS_VERSION}" \
&& code-server --install-extension "mhutchie.git-graph@${CODESERVER_GIT_GRAPH_VERSION}"

# s6 - copy scripts
COPY --chown=jovyan:users s6/ /etc

# s6 - 01-copy-tmp-home
USER root
RUN mkdir -p /tmp_home \
&& cp -r ${HOME} /tmp_home \
&& chown -R ${NB_USER}:users /tmp_home
USER ${NB_UID}
3 changes: 3 additions & 0 deletions codeserver-python/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
kfp==1.7.2
kfp-server-api==1.7.0
kfserving==0.6.0
3 changes: 3 additions & 0 deletions codeserver-python/s6/cont-init.d/02-conda-init
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/usr/bin/with-contenv bash
conda init bash
conda activate base
Loading

0 comments on commit dab5faa

Please sign in to comment.