From ee74a60082b34c3a3d0df8a0d5d5cbfd7ec5ed6a Mon Sep 17 00:00:00 2001 From: Mike Alfare <13974384+mikealfare@users.noreply.github.com> Date: Mon, 15 Apr 2024 17:26:01 -0400 Subject: [PATCH] Simplify docker release process in the release pipeline (#9928) * simplify dockerfile, eliminate references to adapter repos as they will be handled in those repos * keep dbt-postgres target for historical releases of dbt-postgres * update third party image to pip install conditionally --- docker/Dockerfile | 137 +++++++++++----------------------------------- docker/README.md | 69 +++-------------------- 2 files changed, 41 insertions(+), 165 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 0466c05aa93..10e63d3ec27 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,133 +1,60 @@ -## -# Generic dockerfile for dbt image building. -# See README for operational details -## - -# Top level build args -ARG build_for=linux/amd64 - -## -# base image (abstract) -## -# Please do not upgrade beyond python3.10.7 currently as dbt-spark does not support -# 3.11py and images do not get made properly -FROM --platform=$build_for python:3.10.7-slim-bullseye as base - -# N.B. The refs updated automagically every release via bumpversion -ARG dbt_core_ref=dbt-core@v1.8.0b2 -ARG dbt_postgres_ref=dbt-postgres@v1.8.0b2 -ARG dbt_redshift_ref=dbt-redshift@v1.8.0b2 -ARG dbt_bigquery_ref=dbt-bigquery@v1.8.0b2 -ARG dbt_snowflake_ref=dbt-snowflake@v1.8.0b2 -ARG dbt_spark_ref=dbt-spark@v1.8.0b2 -# special case args -ARG dbt_spark_version=all -ARG dbt_third_party +# this image gets published to GHCR for production use +ARG py_version=3.10.7 + +FROM python:$py_version-slim-bullseye as base -# System setup RUN apt-get update \ && apt-get dist-upgrade -y \ && apt-get install -y --no-install-recommends \ - git \ - ssh-client \ - software-properties-common \ - make \ - build-essential \ - ca-certificates \ - libpq-dev \ + build-essential=12.9 \ + ca-certificates=20210119 \ + git=1:2.30.2-1+deb11u2 \ + libpq-dev=13.14-0+deb11u1 \ + make=4.3-4.1 \ + openssh-client=1:8.4p1-5+deb11u3 \ + software-properties-common=0.96.20.2-2.1 \ && apt-get clean \ && rm -rf \ /var/lib/apt/lists/* \ /tmp/* \ /var/tmp/* -# Env vars ENV PYTHONIOENCODING=utf-8 ENV LANG=C.UTF-8 -# Update python -RUN python -m pip install --upgrade pip setuptools wheel --no-cache-dir +RUN python -m pip install --upgrade "pip==24.0" "setuptools==69.2.0" "wheel==0.43.0" --no-cache-dir -# Set docker basics -WORKDIR /usr/app/dbt/ -ENTRYPOINT ["dbt"] -## -# dbt-core -## FROM base as dbt-core -RUN python -m pip install --no-cache-dir "git+https://github.com/dbt-labs/${dbt_core_ref}#egg=dbt-core&subdirectory=core" -## -# dbt-postgres -## -FROM base as dbt-postgres -RUN python -m pip install --no-cache-dir "git+https://github.com/dbt-labs/${dbt_postgres_ref}#egg=dbt-postgres&subdirectory=plugins/postgres" +ARG commit_ref=main +HEALTHCHECK CMD dbt --version || exit 1 -## -# dbt-redshift -## -FROM base as dbt-redshift -RUN python -m pip install --no-cache-dir "git+https://github.com/dbt-labs/${dbt_redshift_ref}#egg=dbt-redshift" +WORKDIR /usr/app/dbt/ +ENTRYPOINT ["dbt"] +RUN python -m pip install --no-cache-dir "dbt-core @ git+https://github.com/dbt-labs/dbt-core@${commit_ref}#subdirectory=core" -## -# dbt-bigquery -## -FROM base as dbt-bigquery -RUN python -m pip install --no-cache-dir "git+https://github.com/dbt-labs/${dbt_bigquery_ref}#egg=dbt-bigquery" +FROM base as dbt-postgres -## -# dbt-snowflake -## -FROM base as dbt-snowflake -RUN python -m pip install --no-cache-dir "git+https://github.com/dbt-labs/${dbt_snowflake_ref}#egg=dbt-snowflake" +ARG commit_ref=main -## -# dbt-spark -## -FROM base as dbt-spark -RUN apt-get update \ - && apt-get dist-upgrade -y \ - && apt-get install -y --no-install-recommends \ - python-dev \ - libsasl2-dev \ - gcc \ - unixodbc-dev \ - && apt-get clean \ - && rm -rf \ - /var/lib/apt/lists/* \ - /tmp/* \ - /var/tmp/* -RUN python -m pip install --no-cache-dir "git+https://github.com/dbt-labs/${dbt_spark_ref}#egg=dbt-spark[${dbt_spark_version}]" +HEALTHCHECK CMD dbt --version || exit 1 + +WORKDIR /usr/app/dbt/ +ENTRYPOINT ["dbt"] + +RUN python -m pip install --no-cache-dir "dbt-postgres @ git+https://github.com/dbt-labs/dbt-core@${commit_ref}#subdirectory=plugins/postgres" -## -# dbt-third-party -## FROM dbt-core as dbt-third-party -RUN python -m pip install --no-cache-dir "${dbt_third_party}" -## -# dbt-all -## -FROM base as dbt-all -RUN apt-get update \ - && apt-get dist-upgrade -y \ - && apt-get install -y --no-install-recommends \ - python-dev \ - libsasl2-dev \ - gcc \ - unixodbc-dev \ - && apt-get clean \ - && rm -rf \ - /var/lib/apt/lists/* \ - /tmp/* \ - /var/tmp/* - RUN python -m pip install --no-cache "git+https://github.com/dbt-labs/${dbt_redshift_ref}#egg=dbt-redshift" - RUN python -m pip install --no-cache "git+https://github.com/dbt-labs/${dbt_bigquery_ref}#egg=dbt-bigquery" - RUN python -m pip install --no-cache "git+https://github.com/dbt-labs/${dbt_snowflake_ref}#egg=dbt-snowflake" - RUN python -m pip install --no-cache "git+https://github.com/dbt-labs/${dbt_spark_ref}#egg=dbt-spark[${dbt_spark_version}]" - RUN python -m pip install --no-cache "git+https://github.com/dbt-labs/${dbt_postgres_ref}#egg=dbt-postgres&subdirectory=plugins/postgres" +ARG dbt_third_party + +RUN if [ "$dbt_third_party" ]; then \ + python -m pip install --no-cache-dir "${dbt_third_party}"; \ + else \ + echo "No third party adapter provided"; \ + fi \ diff --git a/docker/README.md b/docker/README.md index e4af582a29a..d05184146ed 100644 --- a/docker/README.md +++ b/docker/README.md @@ -5,13 +5,9 @@ This docker file is suitable for building dbt Docker images locally or using wit ## Building an image: This Dockerfile can create images for the following targets, each named after the database they support: * `dbt-core` _(no db-adapter support)_ -* `dbt-postgres` -* `dbt-redshift` -* `dbt-bigquery` -* `dbt-snowflake` -* `dbt-spark` * `dbt-third-party` _(requires additional build-arg)_ -* `dbt-all` _(installs all of the above in a single image)_ + +For platform-specific images, please refer to that platform's repository (eg. `dbt-labs/dbt-postgres`) In order to build a new image, run the following docker command. ``` @@ -22,53 +18,27 @@ docker build --tag --target --- -By default the images will be populated with the most recent release of `dbt-core` and whatever database adapter you select. If you need to use a different version you can specify it by git ref using the `--build-arg` flag: +By default the images will be populated with `dbt-core` on `main`. +If you need to use a different version you can specify it by git ref (tag, branch, sha) using the `--build-arg` flag: ``` docker build --tag \ --target \ - --build-arg = \ + --build-arg commit_ref= \ ``` -valid arg names for versioning are: -* `dbt_core_ref` -* `dbt_postgres_ref` -* `dbt_redshift_ref` -* `dbt_bigquery_ref` -* `dbt_snowflake_ref` -* `dbt_spark_ref` - ---- ->**NOTE:** Only override a _single_ build arg for each build. Using multiple overrides may lead to a non-functioning image. ---- - -If you wish to build an image with a third-party adapter you can use the `dbt-third-party` target. This target requires you provide a path to the adapter that can be processed by `pip` by using the `dbt_third_party` build arg: +If you wish to build an image with a third-party adapter you can use the `dbt-third-party` target. +This target requires you provide a path to the adapter that can be processed by `pip` by using the `dbt_third_party` build arg: ``` docker build --tag \ --target dbt-third-party \ --build-arg dbt_third_party= \ ``` +This can also be combined with the `commit_ref` build arg to specify a version of `dbt-core`. ### Examples: -To build an image named "my-dbt" that supports redshift using the latest releases: -``` -cd dbt-core/docker -docker build --tag my-dbt --target dbt-redshift . -``` - -To build an image named "my-other-dbt" that supports bigquery using `dbt-core` version 0.21.latest and the bigquery adapter version 1.0.0b1: -``` -cd dbt-core/docker -docker build \ - --tag my-other-dbt \ - --target dbt-bigquery \ - --build-arg dbt_bigquery_ref=dbt-bigquery@v1.0.0b1 \ - --build-arg dbt_core_ref=dbt-core@0.21.latest \ - . -``` - -To build an image named "my-third-party-dbt" that uses [Materilize third party adapter](https://github.com/MaterializeInc/materialize/tree/main/misc/dbt-materialize) and the latest release of `dbt-core`: +To build an image named "my-third-party-dbt" that uses the latest release of [Materialize third party adapter](https://github.com/MaterializeInc/materialize/tree/main/misc/dbt-materialize) and the latest dev version of `dbt-core`: ``` cd dbt-core/docker docker build --tag my-third-party-dbt \ @@ -78,27 +48,6 @@ docker build --tag my-third-party-dbt \ ``` -## Special cases -There are a few special cases worth noting: -* The `dbt-spark` database adapter comes in three different versions named `PyHive`, `ODBC`, and the default `all`. If you wish to overide this you can use the `--build-arg` flag with the value of `dbt_spark_version=`. See the [docs](https://docs.getdbt.com/reference/warehouse-profiles/spark-profile) for more information. - -``` -docker build --tag my_dbt \ - --target dbt-postgres \ - --build-arg dbt_postgres_ref=dbt-core@1.0.0b1 \ - -``` - -* If you need to build against another architecture (linux/arm64 in this example) you can overide the `build_for` build arg: -``` -docker build --tag my_dbt \ - --target dbt-postgres \ - --build-arg build_for=linux/arm64 \ - -``` - -Supported architectures can be found in the python docker [dockerhub page](https://hub.docker.com/_/python). - ## Running an image in a container: The `ENTRYPOINT` for this Dockerfile is the command `dbt` so you can bind-mount your project to `/usr/app` and use dbt as normal: ```