Skip to content

Commit

Permalink
Merge branch 'master' into fix/ING-691/mode-ingestion-failure
Browse files Browse the repository at this point in the history
  • Loading branch information
pedro93 authored Sep 20, 2024
2 parents 7cf57da + 9eefedf commit a035609
Show file tree
Hide file tree
Showing 839 changed files with 196,427 additions and 14,178 deletions.
3 changes: 2 additions & 1 deletion .github/actions/ci-optimization/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ runs:
- "metadata-ingestion-modules/**"
- "metadata-ingestion/**"
- "metadata-models/**"
- "docker/datahub-ingestion**"
- "docker/datahub-ingestion-base/**"
- "docker/datahub-ingestion/**"
ingestion-base:
- "docker/datahub-ingestion-base/**"
docker:
Expand Down
86 changes: 60 additions & 26 deletions .github/actions/docker-custom-build-and-push/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,16 +31,21 @@ inputs:
description: "Main tag to use for the Docker image"
required: true
flavor:
description: 'Image flavor (e.g., slim, full)'
description: "Image flavor (e.g., slim, full)"
required: false
target:
description: "Sets the target stage to build"
required: false
depot-project:
# Setting this will use native arm64 docker builds instead of QEMU emulation.
# This speeds up builds by 2-3x.
description: "Depot project id"
required: false

outputs:
image_tag:
description: "Docker image tags"
value: ${{ steps.docker_meta.outputs.tags }}
# image_name: ${{ env.DATAHUB_GMS_IMAGE }}

runs:
using: "composite"
Expand All @@ -53,15 +58,27 @@ runs:
images: ${{ inputs.images }}
flavor: |
latest=false
suffix=${{ inputs.flavor && format('-{0}', inputs.flavor) || '' }}
tags: |
type=raw,value=${{ inputs.image_tag }}
type=raw,value=head,enable={{is_default_branch}}
type=sha,prefix=,format=short
type=raw,value=head,suffix=${{ inputs.flavor && format('-{0}', inputs.flavor) || '' }},enable={{is_default_branch}}
type=sha,prefix=,format=short,suffix=${{ inputs.flavor && format('-{0}', inputs.flavor) || '' }}
- name: Single Tag
id: single_tag
shell: bash
run: |
IMAGES="""
${{ inputs.images }}
"""
TAGS="""
${{ inputs.image_tag }}
"""
echo "SINGLE_IMAGE=$(echo $IMAGES | tr '\n' ' ' | awk -F' |,' '{ print $1 }')" >> "$GITHUB_OUTPUT"
echo "SINGLE_TAG=$(echo $IMAGES | tr '\n' ' ' | awk -F' |,' '{ print $1 }'):$(echo $TAGS | tr '\n' ' ' | awk -F' |,' '{ print $1 }')" >> "$GITHUB_OUTPUT"
# Code for testing the build when not pushing to Docker Hub.
- name: Build and Load image for testing (if not publishing)
uses: docker/build-push-action@v5
uses: docker/build-push-action@v6
if: ${{ inputs.publish != 'true' }}
with:
context: ${{ inputs.context }}
Expand All @@ -74,20 +91,11 @@ runs:
target: ${{ inputs.target }}
load: true
push: false
cache-from: type=registry,ref=${{ steps.docker_meta.outputs.tags }}
cache-to: type=inline
- name: Single Tag
if: ${{ inputs.publish != 'true' }}
shell: bash
run: |
IMAGES="""
${{ inputs.images }}
"""
TAGS="""
${{ inputs.image_tag }}
"""
echo "SINGLE_TAG=$(echo $IMAGES | tr '\n' ' ' | awk -F' |,' '{ print $1 }'):$(echo $TAGS | tr '\n' ' ' | awk -F' |,' '{ print $1 }')" >> $GITHUB_OUTPUT
id: single_tag
cache-from: |
type=registry,ref=${{ steps.single_tag.outputs.SINGLE_IMAGE }}:head${{ inputs.flavor && format('-{0}', inputs.flavor) || '' }}
type=registry,ref=${{ steps.docker_meta.outputs.tags }}
cache-to: |
type=inline
- name: Upload image locally for testing (if not publishing)
uses: ishworkh/docker-image-artifact-upload@v1
if: ${{ inputs.publish != 'true' }}
Expand All @@ -97,19 +105,42 @@ runs:
# Code for building multi-platform images and pushing to Docker Hub.
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
if: ${{ inputs.publish == 'true' }}
if: ${{ inputs.publish == 'true' && inputs.depot-project == '' }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
if: ${{ inputs.publish == 'true' }}
if: ${{ inputs.publish == 'true' && inputs.depot-project == '' }}
- name: Setup Depot CLI
uses: depot/setup-action@v1
if: ${{ inputs.publish == 'true' && inputs.depot-project != '' }}
- name: Login to DockerHub
uses: docker/login-action@v3
if: ${{ inputs.publish == 'true' }}
with:
username: ${{ inputs.username }}
password: ${{ inputs.password }}

# Depot variant.
- name: Build and Push Multi-Platform image
uses: docker/build-push-action@v5
if: ${{ inputs.publish == 'true' }}
uses: depot/build-push-action@v1
if: ${{ inputs.publish == 'true' && inputs.depot-project != '' }}
with:
project: ${{ inputs.depot-project }}
context: ${{ inputs.context }}
file: ${{ inputs.file }}
platforms: ${{ inputs.platforms }}
build-args: ${{ inputs.build-args }}
tags: ${{ steps.docker_meta.outputs.tags }}
target: ${{ inputs.target }}
push: true
cache-from: |
type=registry,ref=${{ steps.single_tag.outputs.SINGLE_IMAGE }}:head${{ inputs.flavor && format('-{0}', inputs.flavor) || '' }}
type=registry,ref=${{ steps.docker_meta.outputs.tags }}
cache-to: |
type=inline
- name: Build and Push Multi-Platform image
uses: docker/build-push-action@v6
if: ${{ inputs.publish == 'true' && inputs.depot-project == '' }}
with:
context: ${{ inputs.context }}
file: ${{ inputs.file }}
Expand All @@ -118,7 +149,10 @@ runs:
tags: ${{ steps.docker_meta.outputs.tags }}
target: ${{ inputs.target }}
push: true
cache-from: type=registry,ref=${{ steps.docker_meta.outputs.tags }}
cache-to: type=inline
cache-from: |
type=registry,ref=${{ steps.single_tag.outputs.SINGLE_IMAGE }}:head${{ inputs.flavor && format('-{0}', inputs.flavor) || '' }}
type=registry,ref=${{ steps.docker_meta.outputs.tags }}
cache-to: |
type=inline
# TODO add code for vuln scanning?
10 changes: 5 additions & 5 deletions .github/workflows/airflow-plugin.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,16 +43,16 @@ jobs:
extra_pip_requirements: "apache-airflow~=2.4.3"
extra_pip_extras: plugin-v2,test-airflow24
- python-version: "3.10"
extra_pip_requirements: 'apache-airflow~=2.6.3 -c https://raw.githubusercontent.com/apache/airflow/constraints-2.6.3/constraints-3.10.txt'
extra_pip_requirements: "apache-airflow~=2.6.3 -c https://raw.githubusercontent.com/apache/airflow/constraints-2.6.3/constraints-3.10.txt"
extra_pip_extras: plugin-v2
- python-version: "3.10"
extra_pip_requirements: 'apache-airflow~=2.7.3 -c https://raw.githubusercontent.com/apache/airflow/constraints-2.7.3/constraints-3.10.txt'
extra_pip_requirements: "apache-airflow~=2.7.3 -c https://raw.githubusercontent.com/apache/airflow/constraints-2.7.3/constraints-3.10.txt"
extra_pip_extras: plugin-v2
- python-version: "3.10"
extra_pip_requirements: 'apache-airflow~=2.8.1 -c https://raw.githubusercontent.com/apache/airflow/constraints-2.8.1/constraints-3.10.txt'
extra_pip_requirements: "apache-airflow~=2.8.1 -c https://raw.githubusercontent.com/apache/airflow/constraints-2.8.1/constraints-3.10.txt"
extra_pip_extras: plugin-v2
- python-version: "3.11"
extra_pip_requirements: 'apache-airflow~=2.9.3 -c https://raw.githubusercontent.com/apache/airflow/constraints-2.9.3/constraints-3.11.txt'
extra_pip_requirements: "apache-airflow~=2.9.3 -c https://raw.githubusercontent.com/apache/airflow/constraints-2.9.3/constraints-3.11.txt"
extra_pip_extras: plugin-v2
fail-fast: false
steps:
Expand All @@ -73,7 +73,7 @@ jobs:
run: ./gradlew -Pextra_pip_requirements='${{ matrix.extra_pip_requirements }}' -Pextra_pip_extras='${{ matrix.extra_pip_extras }}' :metadata-ingestion-modules:airflow-plugin:build
- name: pip freeze show list installed
if: always()
run: source metadata-ingestion-modules/airflow-plugin/venv/bin/activate && pip freeze
run: source metadata-ingestion-modules/airflow-plugin/venv/bin/activate && uv pip freeze
- uses: actions/upload-artifact@v3
if: ${{ always() && matrix.python-version == '3.10' && matrix.extra_pip_requirements == 'apache-airflow>=2.7.0' }}
with:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/dagster-plugin.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ jobs:
run: ./gradlew -Pextra_pip_requirements='${{ matrix.extraPythonRequirement }}' :metadata-ingestion-modules:dagster-plugin:lint :metadata-ingestion-modules:dagster-plugin:testQuick
- name: pip freeze show list installed
if: always()
run: source metadata-ingestion-modules/dagster-plugin/venv/bin/activate && pip freeze
run: source metadata-ingestion-modules/dagster-plugin/venv/bin/activate && uv pip freeze
- uses: actions/upload-artifact@v3
if: ${{ always() && matrix.python-version == '3.10' && matrix.extraPythonRequirement == 'dagster>=1.3.3' }}
with:
Expand Down
65 changes: 42 additions & 23 deletions .github/workflows/docker-unified.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ env:
DATAHUB_INGESTION_BASE_IMAGE: "acryldata/datahub-ingestion-base"
DATAHUB_INGESTION_IMAGE: "acryldata/datahub-ingestion"

permissions:
contents: read
id-token: write

jobs:
setup:
runs-on: ubuntu-latest
Expand Down Expand Up @@ -68,23 +72,23 @@ jobs:
id: tag
run: |
source .github/scripts/docker_helpers.sh
echo "short_sha=${SHORT_SHA}" >> $GITHUB_OUTPUT
echo "tag=$(get_tag)" >> $GITHUB_OUTPUT
echo "slim_tag=$(get_tag_slim)" >> $GITHUB_OUTPUT
echo "full_tag=$(get_tag_full)" >> $GITHUB_OUTPUT
echo "unique_tag=$(get_unique_tag)" >> $GITHUB_OUTPUT
echo "unique_slim_tag=$(get_unique_tag_slim)" >> $GITHUB_OUTPUT
echo "unique_full_tag=$(get_unique_tag_full)" >> $GITHUB_OUTPUT
echo "python_release_version=$(get_python_docker_release_v)" >> $GITHUB_OUTPUT
echo "branch_name=${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}}" >> $GITHUB_OUTPUT
echo "repository_name=${GITHUB_REPOSITORY#*/}" >> $GITHUB_OUTPUT
echo "short_sha=${SHORT_SHA}" >> "$GITHUB_OUTPUT"
echo "tag=$(get_tag)" >> "$GITHUB_OUTPUT"
echo "slim_tag=$(get_tag_slim)" >> "$GITHUB_OUTPUT"
echo "full_tag=$(get_tag_full)" >> "$GITHUB_OUTPUT"
echo "unique_tag=$(get_unique_tag)" >> "$GITHUB_OUTPUT"
echo "unique_slim_tag=$(get_unique_tag_slim)" >> "$GITHUB_OUTPUT"
echo "unique_full_tag=$(get_unique_tag_full)" >> "$GITHUB_OUTPUT"
echo "python_release_version=$(get_python_docker_release_v)" >> "$GITHUB_OUTPUT"
echo "branch_name=${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}}" >> "$GITHUB_OUTPUT"
echo "repository_name=${GITHUB_REPOSITORY#*/}" >> "$GITHUB_OUTPUT"
- name: Check whether docker login is possible
id: docker-login
env:
ENABLE_DOCKER_LOGIN: ${{ secrets.ACRYL_DOCKER_PASSWORD != '' }}
run: |
echo "Enable Docker Login: ${{ env.ENABLE_DOCKER_LOGIN }}"
echo "docker-login=${{ env.ENABLE_DOCKER_LOGIN }}" >> $GITHUB_OUTPUT
echo "docker-login=${{ env.ENABLE_DOCKER_LOGIN }}" >> "$GITHUB_OUTPUT"
- name: Check whether publishing enabled
id: publish
env:
Expand All @@ -95,7 +99,7 @@ jobs:
}}
run: |
echo "Enable publish: ${{ env.ENABLE_PUBLISH }}"
echo "publish=${{ env.ENABLE_PUBLISH }}" >> $GITHUB_OUTPUT
echo "publish=${{ env.ENABLE_PUBLISH }}" >> "$GITHUB_OUTPUT"
- name: Check whether PR publishing enabled
id: pr-publish
env:
Expand All @@ -106,7 +110,7 @@ jobs:
}}
run: |
echo "Enable PR publish: ${{ env.ENABLE_PUBLISH }}"
echo "publish=${{ env.ENABLE_PUBLISH }}" >> $GITHUB_OUTPUT
echo "publish=${{ env.ENABLE_PUBLISH }}" >> "$GITHUB_OUTPUT"
- uses: ./.github/actions/ci-optimization
id: ci-optimize
- uses: actions/setup-python@v5
Expand Down Expand Up @@ -543,9 +547,10 @@ jobs:
context: .
file: ./docker/datahub-ingestion-base/Dockerfile
platforms: linux/amd64,linux/arm64/v8
depot-project: ${{ vars.DEPOT_PROJECT_ID }}
- name: Compute DataHub Ingestion (Base) Tag
id: tag
run: echo "tag=${{ needs.setup.outputs.ingestion_base_change == 'true' && needs.setup.outputs.unique_tag || 'head' }}" >> $GITHUB_OUTPUT
run: echo "tag=${{ needs.setup.outputs.ingestion_base_change == 'true' && needs.setup.outputs.unique_tag || 'head' }}" >> "$GITHUB_OUTPUT"
datahub_ingestion_base_slim_build:
name: Build and Push DataHub Ingestion (Base-Slim) Docker Image
runs-on: ubuntu-latest
Expand Down Expand Up @@ -575,6 +580,7 @@ jobs:
images: |
${{ env.DATAHUB_INGESTION_BASE_IMAGE }}
image_tag: ${{ needs.setup.outputs.slim_tag }}
flavor: slim
username: ${{ secrets.ACRYL_DOCKER_USERNAME }}
password: ${{ secrets.ACRYL_DOCKER_PASSWORD }}
build-args: |
Expand All @@ -584,9 +590,10 @@ jobs:
context: .
file: ./docker/datahub-ingestion-base/Dockerfile
platforms: linux/amd64,linux/arm64/v8
depot-project: ${{ vars.DEPOT_PROJECT_ID }}
- name: Compute DataHub Ingestion (Base-Slim) Tag
id: tag
run: echo "tag=${{ needs.setup.outputs.ingestion_base_change == 'true' && needs.setup.outputs.unique_slim_tag || 'head-slim' }}" >> $GITHUB_OUTPUT
run: echo "tag=${{ needs.setup.outputs.ingestion_base_change == 'true' && needs.setup.outputs.unique_slim_tag || 'head-slim' }}" >> "$GITHUB_OUTPUT"
datahub_ingestion_base_full_build:
name: Build and Push DataHub Ingestion (Base-Full) Docker Image
runs-on: ubuntu-latest
Expand Down Expand Up @@ -627,7 +634,7 @@ jobs:
platforms: linux/amd64,linux/arm64/v8
- name: Compute DataHub Ingestion (Base-Full) Tag
id: tag
run: echo "tag=${{ needs.setup.outputs.ingestion_base_change == 'true' && needs.setup.outputs.unique_full_tag || 'head' }}" >> $GITHUB_OUTPUT
run: echo "tag=${{ needs.setup.outputs.ingestion_base_change == 'true' && needs.setup.outputs.unique_full_tag || 'head' }}" >> "$GITHUB_OUTPUT"

datahub_ingestion_slim_build:
name: Build and Push DataHub Ingestion Docker Images
Expand Down Expand Up @@ -673,15 +680,17 @@ jobs:
RELEASE_VERSION=${{ needs.setup.outputs.python_release_version }}
APP_ENV=slim
image_tag: ${{ needs.setup.outputs.slim_tag }}
flavor: slim
username: ${{ secrets.ACRYL_DOCKER_USERNAME }}
password: ${{ secrets.ACRYL_DOCKER_PASSWORD }}
publish: ${{ needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }}
context: .
file: ./docker/datahub-ingestion/Dockerfile
platforms: linux/amd64,linux/arm64/v8
depot-project: ${{ vars.DEPOT_PROJECT_ID }}
- name: Compute Tag
id: tag
run: echo "tag=${{ needs.setup.outputs.ingestion_change == 'true' && needs.setup.outputs.unique_slim_tag || 'head-slim' }}" >> $GITHUB_OUTPUT
run: echo "tag=${{ needs.setup.outputs.ingestion_change == 'true' && needs.setup.outputs.unique_slim_tag || 'head-slim' }}" >> "$GITHUB_OUTPUT"
datahub_ingestion_slim_scan:
permissions:
contents: read # for actions/checkout to fetch code
Expand Down Expand Up @@ -711,6 +720,7 @@ jobs:
severity: "CRITICAL,HIGH"
ignore-unfixed: true
vuln-type: "os,library"
timeout: 15m
- name: Upload Trivy scan results to GitHub Security tab
uses: github/codeql-action/upload-sarif@v2
with:
Expand Down Expand Up @@ -765,9 +775,10 @@ jobs:
context: .
file: ./docker/datahub-ingestion/Dockerfile
platforms: linux/amd64,linux/arm64/v8
depot-project: ${{ vars.DEPOT_PROJECT_ID }}
- name: Compute Tag (Full)
id: tag
run: echo "tag=${{ needs.setup.outputs.ingestion_change == 'true' && needs.setup.outputs.unique_tag || 'head' }}" >> $GITHUB_OUTPUT
run: echo "tag=${{ needs.setup.outputs.ingestion_change == 'true' && needs.setup.outputs.unique_tag || 'head' }}" >> "$GITHUB_OUTPUT"
datahub_ingestion_full_scan:
permissions:
contents: read # for actions/checkout to fetch code
Expand Down Expand Up @@ -797,6 +808,7 @@ jobs:
severity: "CRITICAL,HIGH"
ignore-unfixed: true
vuln-type: "os,library"
timeout: 15m
- name: Upload Trivy scan results to GitHub Security tab
uses: github/codeql-action/upload-sarif@v2
with:
Expand All @@ -811,13 +823,13 @@ jobs:
- id: set-matrix
run: |
if [ '${{ needs.setup.outputs.frontend_only }}' == 'true' ]; then
echo 'matrix=["cypress_suite1","cypress_rest"]' >> $GITHUB_OUTPUT
echo 'matrix=["cypress_suite1","cypress_rest"]' >> "$GITHUB_OUTPUT"
elif [ '${{ needs.setup.outputs.ingestion_only }}' == 'true' ]; then
echo 'matrix=["no_cypress_suite0","no_cypress_suite1"]' >> $GITHUB_OUTPUT
echo 'matrix=["no_cypress_suite0","no_cypress_suite1"]' >> "$GITHUB_OUTPUT"
elif [[ '${{ needs.setup.outputs.backend_change }}' == 'true' || '${{ needs.setup.outputs.smoke_test_change }}' == 'true' ]]; then
echo 'matrix=["no_cypress_suite0","no_cypress_suite1","cypress_suite1","cypress_rest"]' >> $GITHUB_OUTPUT
echo 'matrix=["no_cypress_suite0","no_cypress_suite1","cypress_suite1","cypress_rest"]' >> "$GITHUB_OUTPUT"
else
echo 'matrix=[]' >> $GITHUB_OUTPUT
echo 'matrix=[]' >> "$GITHUB_OUTPUT"
fi
smoke_test:
Expand Down Expand Up @@ -956,6 +968,13 @@ jobs:
docker pull '${{ env.DATAHUB_ELASTIC_SETUP_IMAGE }}:head'
docker tag '${{ env.DATAHUB_ELASTIC_SETUP_IMAGE }}:head' '${{ env.DATAHUB_ELASTIC_SETUP_IMAGE }}:${{ needs.setup.outputs.unique_tag }}'
fi
if [ '${{ needs.setup.outputs.integrations_service_change }}' == 'false' ]; then
echo 'datahub-integration-service head images'
docker pull '${{ env.DATAHUB_INTEGRATIONS_IMAGE }}:head'
docker tag '${{ env.DATAHUB_INTEGRATIONS_IMAGE }}:head' '${{ env.DATAHUB_INTEGRATIONS_IMAGE }}:${{ needs.setup.outputs.unique_tag }}'
fi
- name: CI Slim Head Images
run: |
if [ '${{ needs.setup.outputs.ingestion_change }}' == 'false' ]; then
echo 'datahub-ingestion head-slim images'
docker pull '${{ env.DATAHUB_INGESTION_IMAGE }}:head-slim'
Expand All @@ -971,7 +990,7 @@ jobs:
DATAHUB_VERSION: ${{ needs.setup.outputs.unique_tag }}
DATAHUB_ACTIONS_IMAGE: ${{ env.DATAHUB_INGESTION_IMAGE }}
ACTIONS_VERSION: ${{ needs.datahub_ingestion_slim_build.outputs.tag || 'head-slim' }}
ACTIONS_EXTRA_PACKAGES: "acryl-datahub-actions[executor]==0.0.13 acryl-datahub-actions==0.0.13 acryl-datahub==0.10.5"
ACTIONS_EXTRA_PACKAGES: "acryl-datahub-actions[executor] acryl-datahub-actions"
ACTIONS_CONFIG: "https://raw.githubusercontent.com/acryldata/datahub-actions/main/docker/config/executor.yaml"
run: |
./smoke-test/run-quickstart.sh
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/gx-plugin.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ jobs:
run: ./gradlew -Pextra_pip_requirements='${{ matrix.extraPythonRequirement }}' :metadata-ingestion-modules:gx-plugin:lint :metadata-ingestion-modules:gx-plugin:testQuick
- name: pip freeze show list installed
if: always()
run: source metadata-ingestion-modules/gx-plugin/venv/bin/activate && pip freeze
run: source metadata-ingestion-modules/gx-plugin/venv/bin/activate && uv pip freeze
- uses: actions/upload-artifact@v3
if: ${{ always() && matrix.python-version == '3.11' && matrix.extraPythonRequirement == 'great-expectations~=0.17.0' }}
with:
Expand Down
Loading

0 comments on commit a035609

Please sign in to comment.