diff --git a/.changes/unreleased/Security-20230817-145626.yaml b/.changes/unreleased/Security-20230817-145626.yaml new file mode 100644 index 000000000..4add88cbc --- /dev/null +++ b/.changes/unreleased/Security-20230817-145626.yaml @@ -0,0 +1,6 @@ +kind: Security +body: Add docker image to the repo +time: 2023-08-17T14:56:26.361208+02:00 +custom: + Author: Fokko + PR: "876" diff --git a/docker-compose.yml b/docker-compose.yml index 9bc9e509c..ad083eaf4 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,7 +2,7 @@ version: "3.7" services: dbt-spark3-thrift: - image: godatadriven/spark:3.1.1 + build: docker/ ports: - "10000:10000" - "4040:4040" @@ -19,7 +19,7 @@ services: - WAIT_FOR=dbt-hive-metastore:5432 dbt-hive-metastore: - image: postgres:9.6.17-alpine + image: postgres:9-alpine volumes: - ./.hive-metastore/:/var/lib/postgresql/data environment: diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 000000000..bb4d378ed --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,30 @@ +ARG OPENJDK_VERSION=8 +FROM eclipse-temurin:${OPENJDK_VERSION}-jre + +ARG BUILD_DATE +ARG SPARK_VERSION=3.3.2 +ARG HADOOP_VERSION=3 + +LABEL org.label-schema.name="Apache Spark ${SPARK_VERSION}" \ + org.label-schema.build-date=$BUILD_DATE \ + org.label-schema.version=$SPARK_VERSION + +ENV SPARK_HOME /usr/spark +ENV PATH="/usr/spark/bin:/usr/spark/sbin:${PATH}" + +RUN apt-get update && \ + apt-get install -y wget netcat procps libpostgresql-jdbc-java && \ + wget -q "http://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \ + tar xzf "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \ + rm "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \ + mv "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}" /usr/spark && \ + ln -s /usr/share/java/postgresql-jdbc4.jar /usr/spark/jars/postgresql-jdbc4.jar && \ + apt-get remove -y wget && \ + apt-get autoremove -y && \ + apt-get clean + +COPY entrypoint.sh /scripts/ +RUN chmod +x /scripts/entrypoint.sh + +ENTRYPOINT ["/scripts/entrypoint.sh"] +CMD ["--help"] diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh new file mode 100644 index 000000000..6a7591389 --- /dev/null +++ b/docker/entrypoint.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +if [ -n "$WAIT_FOR" ]; then + IFS=';' read -a HOSTPORT_ARRAY <<< "$WAIT_FOR" + for HOSTPORT in "${HOSTPORT_ARRAY[@]}" + do + WAIT_FOR_HOST=${HOSTPORT%:*} + WAIT_FOR_PORT=${HOSTPORT#*:} + + echo Waiting for $WAIT_FOR_HOST to listen on $WAIT_FOR_PORT... + while ! nc -z $WAIT_FOR_HOST $WAIT_FOR_PORT; do echo sleeping; sleep 2; done + done +fi + +exec spark-submit "$@"