-
Notifications
You must be signed in to change notification settings - Fork 174
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[CHORE] bring up fixtures for iceberg (#1527)
* Adds fixtures (Minio / Iceberg Rest server / Spark) to enable iceberg integration tests
- Loading branch information
Showing
28 changed files
with
780 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -111,7 +111,7 @@ jobs: | |
- name: Spin up IO services | ||
uses: isbang/[email protected] | ||
with: | ||
compose-file: ./tests/integration/docker-compose/docker-compose.yml | ||
compose-file: ./tests/integration/io/docker-compose/docker-compose.yml | ||
down-flags: --volumes | ||
- name: Run IO integration tests | ||
run: | | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -307,7 +307,7 @@ jobs: | |
- name: Spin up IO services | ||
uses: isbang/[email protected] | ||
with: | ||
compose-file: ./tests/integration/docker-compose/docker-compose.yml | ||
compose-file: ./tests/integration/io/docker-compose/docker-compose.yml | ||
down-flags: --volumes | ||
- name: Run IO integration tests | ||
run: | | ||
|
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
# Licensed to the Apache Software Foundation (ASF) under one or more | ||
# contributor license agreements. See the NOTICE file distributed with | ||
# this work for additional information regarding copyright ownership. | ||
# The ASF licenses this file to You under the Apache License, Version 2.0 | ||
# (the "License"); you may not use this file except in compliance with | ||
# the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
FROM python:3.9-bullseye | ||
|
||
RUN apt-get -qq update && \ | ||
apt-get -qq install -y --no-install-recommends \ | ||
sudo \ | ||
curl \ | ||
vim \ | ||
unzip \ | ||
openjdk-11-jdk \ | ||
build-essential \ | ||
software-properties-common \ | ||
ssh && \ | ||
apt-get -qq clean && \ | ||
rm -rf /var/lib/apt/lists/* | ||
|
||
# Optional env variables | ||
ENV SPARK_HOME=${SPARK_HOME:-"/opt/spark"} | ||
ENV HADOOP_HOME=${HADOOP_HOME:-"/opt/hadoop"} | ||
ENV PYTHONPATH=$SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.9.7-src.zip:$PYTHONPATH | ||
|
||
RUN mkdir -p ${HADOOP_HOME} && mkdir -p ${SPARK_HOME} && mkdir -p /home/iceberg/spark-events | ||
WORKDIR ${SPARK_HOME} | ||
|
||
ENV SPARK_VERSION=3.4.1 | ||
ENV ICEBERG_SPARK_RUNTIME_VERSION=3.4_2.12 | ||
ENV ICEBERG_VERSION=1.4.0 | ||
ENV AWS_SDK_VERSION=2.20.18 | ||
ENV PYICEBERG_VERSION=0.4.0 | ||
|
||
RUN curl --retry 3 -s -C - https://dlcdn.apache.org/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop3.tgz -o spark-${SPARK_VERSION}-bin-hadoop3.tgz \ | ||
&& tar xzf spark-${SPARK_VERSION}-bin-hadoop3.tgz --directory /opt/spark --strip-components 1 \ | ||
&& rm -rf spark-${SPARK_VERSION}-bin-hadoop3.tgz | ||
|
||
# Download iceberg spark runtime | ||
RUN curl -s https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}/${ICEBERG_VERSION}/iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}-${ICEBERG_VERSION}.jar -Lo iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}-${ICEBERG_VERSION}.jar \ | ||
&& mv iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}-${ICEBERG_VERSION}.jar /opt/spark/jars | ||
|
||
# Download AWS bundle | ||
RUN curl -s https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-aws-bundle/${ICEBERG_VERSION}/iceberg-aws-bundle-${ICEBERG_VERSION}.jar -Lo iceberg-aws-bundle-${ICEBERG_VERSION}.jar \ | ||
&& mv iceberg-aws-bundle-${ICEBERG_VERSION}.jar /opt/spark/jars | ||
|
||
COPY spark-defaults.conf /opt/spark/conf | ||
ENV PATH="/opt/spark/sbin:/opt/spark/bin:${PATH}" | ||
|
||
RUN chmod u+x /opt/spark/sbin/* && \ | ||
chmod u+x /opt/spark/bin/* | ||
|
||
RUN pip3 install -q ipython | ||
|
||
RUN pip3 install "pyiceberg[s3fs]==${PYICEBERG_VERSION}" | ||
|
||
COPY entrypoint.sh . | ||
COPY provision.py . | ||
|
||
ENTRYPOINT ["./entrypoint.sh"] | ||
CMD ["notebook"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
#!/usr/bin/env bash | ||
|
||
# | ||
# Licensed to the Apache Software Foundation (ASF) under one or more | ||
# contributor license agreements. See the NOTICE file distributed with | ||
# this work for additional information regarding copyright ownership. | ||
# The ASF licenses this file to You under the Apache License, Version 2.0 | ||
# (the "License"); you may not use this file except in compliance with | ||
# the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# | ||
|
||
|
||
acquire_rat_jar () { | ||
|
||
URL="https://repo.maven.apache.org/maven2/org/apache/rat/apache-rat/${RAT_VERSION}/apache-rat-${RAT_VERSION}.jar" | ||
|
||
JAR="$rat_jar" | ||
|
||
# Download rat launch jar if it hasn't been downloaded yet | ||
if [ ! -f "$JAR" ]; then | ||
# Download | ||
printf "Attempting to fetch rat\n" | ||
JAR_DL="${JAR}.part" | ||
if [ $(command -v curl) ]; then | ||
curl -L --silent "${URL}" > "$JAR_DL" && mv "$JAR_DL" "$JAR" | ||
elif [ $(command -v wget) ]; then | ||
wget --quiet ${URL} -O "$JAR_DL" && mv "$JAR_DL" "$JAR" | ||
else | ||
printf "You do not have curl or wget installed, please install rat manually.\n" | ||
exit -1 | ||
fi | ||
fi | ||
|
||
unzip -tq "$JAR" &> /dev/null | ||
if [ $? -ne 0 ]; then | ||
# We failed to download | ||
rm "$JAR" | ||
printf "Our attempt to download rat locally to ${JAR} failed. Please install rat manually.\n" | ||
exit -1 | ||
fi | ||
} | ||
|
||
# Go to the Spark project root directory | ||
FWDIR="$(cd "`dirname "$0"`"/..; pwd)" | ||
cd "$FWDIR" | ||
|
||
if test -x "$JAVA_HOME/bin/java"; then | ||
declare java_cmd="$JAVA_HOME/bin/java" | ||
else | ||
declare java_cmd=java | ||
fi | ||
|
||
export RAT_VERSION=0.15 | ||
export rat_jar="$FWDIR"/lib/apache-rat-${RAT_VERSION}.jar | ||
mkdir -p "$FWDIR"/lib | ||
|
||
[[ -f "$rat_jar" ]] || acquire_rat_jar || { | ||
echo "Download failed. Obtain the rat jar manually and place it at $rat_jar" | ||
exit 1 | ||
} | ||
|
||
mkdir -p build | ||
$java_cmd -jar "$rat_jar" -E "$FWDIR"/dev/.rat-excludes -d "$FWDIR" > build/rat-results.txt | ||
|
||
if [ $? -ne 0 ]; then | ||
echo "RAT exited abnormally" | ||
exit 1 | ||
fi | ||
|
||
ERRORS="$(cat build/rat-results.txt | grep -e "??")" | ||
|
||
if test ! -z "$ERRORS"; then | ||
echo "Could not find Apache license headers in the following files:" | ||
echo "$ERRORS" | ||
exit 1 | ||
else | ||
echo -e "RAT checks passed." | ||
fi |
26 changes: 26 additions & 0 deletions
26
tests/integration/iceberg/docker-compose/docker-compose-azurite.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
version: '3' | ||
|
||
services: | ||
azurite: | ||
image: mcr.microsoft.com/azure-storage/azurite | ||
container_name: azurite | ||
hostname: azurite | ||
ports: | ||
- 10000:10000 | ||
command: [azurite-blob, --loose, --blobHost, 0.0.0.0] |
30 changes: 30 additions & 0 deletions
30
tests/integration/iceberg/docker-compose/docker-compose-gcs-server.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
version: '3' | ||
|
||
services: | ||
gcs-server: | ||
image: fsouza/fake-gcs-server | ||
container_name: gcs-server | ||
ports: | ||
- 4443:4443 | ||
entrypoint: > | ||
/bin/sh -c " | ||
mkdir -p /data/warehouse; | ||
/bin/fake-gcs-server -data /data -scheme http; | ||
exit 0; | ||
" |
88 changes: 88 additions & 0 deletions
88
tests/integration/iceberg/docker-compose/docker-compose.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
version: '3' | ||
|
||
services: | ||
spark-iceberg: | ||
image: python-integration | ||
container_name: pyiceberg-spark | ||
build: . | ||
networks: | ||
iceberg_net: | ||
depends_on: | ||
- rest | ||
- minio | ||
volumes: | ||
- ./warehouse:/home/iceberg/warehouse | ||
environment: | ||
- AWS_ACCESS_KEY_ID=admin | ||
- AWS_SECRET_ACCESS_KEY=password | ||
- AWS_REGION=us-east-1 | ||
ports: | ||
- 8888:8888 | ||
- 8080:8080 | ||
links: | ||
- rest:rest | ||
- minio:minio | ||
rest: | ||
image: tabulario/iceberg-rest | ||
container_name: pyiceberg-rest | ||
networks: | ||
iceberg_net: | ||
ports: | ||
- 8181:8181 | ||
environment: | ||
- AWS_ACCESS_KEY_ID=admin | ||
- AWS_SECRET_ACCESS_KEY=password | ||
- AWS_REGION=us-east-1 | ||
- CATALOG_WAREHOUSE=s3://warehouse/ | ||
- CATALOG_IO__IMPL=org.apache.iceberg.aws.s3.S3FileIO | ||
- CATALOG_S3_ENDPOINT=http://minio:9000 | ||
minio: | ||
image: minio/minio | ||
container_name: pyiceberg-minio | ||
environment: | ||
- MINIO_ROOT_USER=admin | ||
- MINIO_ROOT_PASSWORD=password | ||
- MINIO_DOMAIN=minio | ||
networks: | ||
iceberg_net: | ||
aliases: | ||
- warehouse.minio | ||
ports: | ||
- 9000:9000 | ||
command: [server, /data] | ||
mc: | ||
depends_on: | ||
- minio | ||
image: minio/mc | ||
container_name: pyiceberg-mc | ||
networks: | ||
iceberg_net: | ||
environment: | ||
- AWS_ACCESS_KEY_ID=admin | ||
- AWS_SECRET_ACCESS_KEY=password | ||
- AWS_REGION=us-east-1 | ||
entrypoint: > | ||
/bin/sh -c " | ||
until (/usr/bin/mc config host add minio http://minio:9000 admin password) do echo '...waiting...' && sleep 1; done; | ||
/usr/bin/mc mb minio/warehouse; | ||
/usr/bin/mc policy set public minio/warehouse; | ||
tail -f /dev/null | ||
" | ||
networks: | ||
iceberg_net: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
#!/bin/bash | ||
# | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
# | ||
|
||
start-master.sh -p 7077 | ||
start-worker.sh spark://spark-iceberg:7077 | ||
start-history-server.sh | ||
python3 provision.py | ||
tail -f /dev/null |
Oops, something went wrong.