feat: Add UBI docker image

jim60105 · Nov 29, 2023 · 1df4aaa · 1df4aaa
1 parent 7f2b6ce
commit 1df4aaa
Show file tree

Hide file tree

Showing 5 changed files with 187 additions and 1 deletion.
diff --git a/.github/workflows/docker_publish.yml b/.github/workflows/docker_publish.yml
@@ -14,6 +14,34 @@ on:
 
 # A workflow run is made up of one or more jobs that can run sequentially or in parallel
 jobs:
+  # Build the ubi-no_model without cache export
+  docker-ubi-no_model:
+    # The type of runner that the job will run on
+    runs-on: ubuntu-latest
+    # Steps represent a sequence of tasks that will be executed as part of the job
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          submodules: true
+
+      - name: Setup docker
+        id: setup
+        uses: ./.github/workflows/docker-reused-setup-steps
+        with:
+          token: ${{ secrets.CR_PAT }}
+          tag: ubi-no_model
+
+      - name: Build and push:ubi-no_model
+        uses: docker/build-push-action@v4
+        with:
+          context: .
+          file: ./Dockerfile.ubi-no_model
+          push: true
+          tags: ${{ steps.setup.outputs.tags }}
+          labels: ${{ steps.setup.outputs.labels }}
+          platforms: linux/amd64, linux/arm64
+
   # Run the no_model build first ensure that the code at least builds
   docker-no_model:
     # The type of runner that the job will run on
@@ -97,7 +125,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       fail-fast: false
-      max-parallel: 10
+      max-parallel: 6
       matrix:
         lang:
           - fr

diff --git a/Dockerfile.ubi b/Dockerfile.ubi
@@ -0,0 +1,80 @@
+# syntax=docker/dockerfile:1
+ARG WHISPER_MODEL=base
+ARG LANG=en
+ARG TORCH_HOME=/cache/torch
+ARG HF_HOME=/cache/huggingface
+
+FROM registry.access.redhat.com/ubi9/ubi-minimal as python
+
+RUN microdnf -y install python3.11 python3.11-devel python3.11-pip && \
+    microdnf clean all
+RUN ln -s /usr/bin/python3.11 /usr/bin/python3 && \
+    ln -s /usr/bin/pip3.11 /usr/bin/pip
+
+RUN python3 -m venv /venv
+ENV PATH="/venv/bin:$PATH"
+
+# Missing dependencies for arm64
+# https://github.com/jim60105/docker-whisperX/issues/14
+ARG TARGETPLATFORM
+RUN if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
+    microdnf -y install libgomp libsndfile && microdnf clean all; \
+    fi
+
+FROM python as build
+
+# Install build time requirements
+RUN microdnf -y install git && \
+    microdnf clean all
+
+RUN --mount=type=cache,target=/root/.cache/pip pip install torch torchaudio --extra-index-url https://download.pytorch.org/whl/cu118
+
+# Install whisperX
+COPY ./whisperX /code
+RUN --mount=type=cache,target=/root/.cache/pip pip install /code
+
+
+FROM build as load_model
+
+ARG TORCH_HOME
+ARG HF_HOME
+
+# Preload vad model
+RUN python3 -c 'from whisperx.vad import load_vad_model; load_vad_model("cpu");'
+
+# Preload fast-whisper
+ARG WHISPER_MODEL
+RUN python3 -c 'import faster_whisper; model = faster_whisper.WhisperModel("'${WHISPER_MODEL}'")'
+
+# Preload align model
+ARG LANG
+COPY load_align_model.py .
+RUN python3 load_align_model.py ${LANG}
+
+
+FROM python as final
+
+USER 1001
+
+# ffmpeg
+COPY --link --from=mwader/static-ffmpeg:6.0 /ffmpeg /usr/local/bin/
+COPY --link --from=mwader/static-ffmpeg:6.0 /ffprobe /usr/local/bin/
+
+# Copy venv
+COPY --link --from=build /venv /venv
+
+COPY --chown=1001 --from=load_model /cache /cache
+
+WORKDIR /app
+ARG TORCH_HOME
+ARG HF_HOME
+ENV TORCH_HOME=${TORCH_HOME}
+ENV HF_HOME=${HF_HOME}
+
+ARG WHISPER_MODEL
+ENV WHISPER_MODEL=${WHISPER_MODEL}
+ARG LANG
+ENV LANG=${LANG}
+
+STOPSIGNAL SIGINT
+ENTRYPOINT whisperx --model ${WHISPER_MODEL} --language ${LANG} $@
diff --git a/Dockerfile.ubi-no_model b/Dockerfile.ubi-no_model
@@ -0,0 +1,52 @@
+# syntax=docker/dockerfile:1
+ARG TORCH_HOME=/cache/torch
+ARG HF_HOME=/cache/huggingface
+
+FROM registry.access.redhat.com/ubi9/ubi-minimal as python
+
+# These libs are missing dependencies for arm64
+RUN microdnf -y install python3.11 python3.11-devel python3.11-pip libgomp libsndfile && \
+    microdnf clean all
+RUN ln -s /usr/bin/python3.11 /usr/bin/python3 && \
+    ln -s /usr/bin/pip3.11 /usr/bin/pip
+
+RUN python3 -m venv /venv
+ENV PATH="/venv/bin:$PATH"
+
+FROM python as build
+
+# Install build time requirements
+RUN microdnf -y install git && \
+    microdnf clean all
+
+RUN --mount=type=cache,target=/root/.cache/pip pip install torch torchaudio --extra-index-url https://download.pytorch.org/whl/cu118
+
+# Install whisperX
+COPY ./whisperX /code
+RUN --mount=type=cache,target=/root/.cache/pip pip install /code
+
+
+FROM python as final
+
+USER 1001
+
+# ffmpeg
+COPY --link --from=mwader/static-ffmpeg:6.0 /ffmpeg /usr/local/bin/
+COPY --link --from=mwader/static-ffmpeg:6.0 /ffprobe /usr/local/bin/
+
+# Copy venv
+COPY --link --from=build /venv /venv
+
+WORKDIR /app
+ARG TORCH_HOME
+ARG HF_HOME
+ENV TORCH_HOME=${TORCH_HOME}
+ENV HF_HOME=${HF_HOME}
+
+ARG WHISPER_MODEL
+ENV WHISPER_MODEL=${WHISPER_MODEL}
+ARG LANG
+ENV LANG=${LANG}
+
+STOPSIGNAL SIGINT
+ENTRYPOINT whisperx $@
diff --git a/README.md b/README.md
@@ -95,6 +95,22 @@ docker run --gpus all -it -v ".:/app" whisperx:large-v3-ja -- --output_format sr
 > Remember to prepend `--` before the arguments.\
 > `--model` and `--language` args are defined in Dockerfile, no need to specify.
 
+### UBI9 Image
+
+I have created an alternative [Dockerfile.ubi](Dockerfile.ubi) that is based on the **Red Hat UBI** image, unlike the default one which used the **Python official image** as the base image. If you are a Red Hat customer, I believe you will find its benefits.
+
+> With the release of the Red Hat Universal Base Image (UBI), you can now take advantage of the greater reliability, security, and performance of official Red Hat container images where OCI-compliant Linux containers run - whether you're a customer or not. --[Red Hat](https://www.redhat.com/en/blog/introducing-red-hat-universal-base-image)
+
+It is important to mention that it is *NOT* necessary obtaining a license from Red Hat to use UBI, however, if you are the subscriber and runs it on RHEL/OpenShift, you can get supports from Red Hat.
+
+Despite my initial hesitation, I made the decision not to utilize the UBI version as the default image. The *Python official image* has a significantly larger user base compared to *UBI*, and I believe that opting for it aligns better with public expectations. Nevertheless, I would still suggest giving the *UBI* version a try.
+
+You can get the pre-build image at tag [ubi-no_model](https://ghcr.io/jim60105/whisperx:ubi-no_model). Notice that only no_model is available. Feel free to build your own image with the [Dockerfile.ubi](Dockerfile.ubi) for your needs. This Dockerfile supports the same build arguments as the default one.
+
+```bash
+docker run --gpus all -it -v ".:/app" ghcr.io/jim60105/whisperx:ubi-no_model -- --model tiny --language en --output_format srt audio.mp3
+```
+
 ## LICENSE
 
 The main program, WhisperX, is distributed under [the BSD-4 license](https://github.com/m-bain/whisperX/blob/main/LICENSE).\

diff --git a/docker-bake.hcl b/docker-bake.hcl
@@ -74,4 +74,14 @@ target "no_model" {
   platforms  = ["linux/amd64", "linux/arm64"]
   cache-from = ["type=local,mode=max,src=cache"]
   cache-to   = ["type=local,mode=max,dest=cache"]
+}
+
+target "ubi-no_model" {
+  dockerfile = "Dockerfile.ubi-no_model"
+  tags = [
+    "ghcr.io/jim60105/whisperx:ubi-no_model"
+  ]
+  platforms  = ["linux/amd64", "linux/arm64"]
+  cache-from = ["type=local,mode=max,src=cache"]
+  cache-to   = ["type=local,mode=max,dest=cache"]
 }