From efa1b1868424f80534ba6ee5d20a72f9e2343947 Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Fri, 1 Sep 2023 01:38:27 +0200 Subject: [PATCH] Add docker image to the repo (#876) I think it makes sense to have the Dockerfile in the repository itself. Resolves #739 Co-authored-by: Anders Co-authored-by: colin-rogers-dbt <111200756+colin-rogers-dbt@users.noreply.github.com> --- .../unreleased/Security-20230817-145626.yaml | 6 ++++ docker-compose.yml | 4 +-- docker/Dockerfile | 30 +++++++++++++++++++ docker/entrypoint.sh | 15 ++++++++++ 4 files changed, 53 insertions(+), 2 deletions(-) create mode 100644 .changes/unreleased/Security-20230817-145626.yaml create mode 100644 docker/Dockerfile create mode 100644 docker/entrypoint.sh diff --git a/.changes/unreleased/Security-20230817-145626.yaml b/.changes/unreleased/Security-20230817-145626.yaml new file mode 100644 index 000000000..4add88cbc --- /dev/null +++ b/.changes/unreleased/Security-20230817-145626.yaml @@ -0,0 +1,6 @@ +kind: Security +body: Add docker image to the repo +time: 2023-08-17T14:56:26.361208+02:00 +custom: + Author: Fokko + PR: "876" diff --git a/docker-compose.yml b/docker-compose.yml index 9bc9e509c..ad083eaf4 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,7 +2,7 @@ version: "3.7" services: dbt-spark3-thrift: - image: godatadriven/spark:3.1.1 + build: docker/ ports: - "10000:10000" - "4040:4040" @@ -19,7 +19,7 @@ services: - WAIT_FOR=dbt-hive-metastore:5432 dbt-hive-metastore: - image: postgres:9.6.17-alpine + image: postgres:9-alpine volumes: - ./.hive-metastore/:/var/lib/postgresql/data environment: diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 000000000..bb4d378ed --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,30 @@ +ARG OPENJDK_VERSION=8 +FROM eclipse-temurin:${OPENJDK_VERSION}-jre + +ARG BUILD_DATE +ARG SPARK_VERSION=3.3.2 +ARG HADOOP_VERSION=3 + +LABEL org.label-schema.name="Apache Spark ${SPARK_VERSION}" \ + org.label-schema.build-date=$BUILD_DATE \ + org.label-schema.version=$SPARK_VERSION + +ENV SPARK_HOME /usr/spark +ENV PATH="/usr/spark/bin:/usr/spark/sbin:${PATH}" + +RUN apt-get update && \ + apt-get install -y wget netcat procps libpostgresql-jdbc-java && \ + wget -q "http://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \ + tar xzf "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \ + rm "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \ + mv "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}" /usr/spark && \ + ln -s /usr/share/java/postgresql-jdbc4.jar /usr/spark/jars/postgresql-jdbc4.jar && \ + apt-get remove -y wget && \ + apt-get autoremove -y && \ + apt-get clean + +COPY entrypoint.sh /scripts/ +RUN chmod +x /scripts/entrypoint.sh + +ENTRYPOINT ["/scripts/entrypoint.sh"] +CMD ["--help"] diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh new file mode 100644 index 000000000..6a7591389 --- /dev/null +++ b/docker/entrypoint.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +if [ -n "$WAIT_FOR" ]; then + IFS=';' read -a HOSTPORT_ARRAY <<< "$WAIT_FOR" + for HOSTPORT in "${HOSTPORT_ARRAY[@]}" + do + WAIT_FOR_HOST=${HOSTPORT%:*} + WAIT_FOR_PORT=${HOSTPORT#*:} + + echo Waiting for $WAIT_FOR_HOST to listen on $WAIT_FOR_PORT... + while ! nc -z $WAIT_FOR_HOST $WAIT_FOR_PORT; do echo sleeping; sleep 2; done + done +fi + +exec spark-submit "$@"