Skip to content

Commit

Permalink
feat(actions): add docs propagation action for columns (#130)
Browse files Browse the repository at this point in the history
  • Loading branch information
shirshanka authored Sep 11, 2024
1 parent 65b6a43 commit 36aa93e
Show file tree
Hide file tree
Showing 28 changed files with 1,997 additions and 44 deletions.
130 changes: 99 additions & 31 deletions .github/workflows/datahub-actions-docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,24 +48,25 @@ jobs:
regular_image:
name: Build & Push Image to DockerHub
runs-on: ubuntu-latest
if: ${{ needs.setup.outputs.publish == 'true' }}
needs: setup
steps:
- name: Check out the repo
uses: actions/checkout@v3
- name: Docker meta
id: docker_meta
uses: crazy-max/ghaction-docker-meta@v1
uses: docker/metadata-action@v5
with:
images: |
acryldata/datahub-actions
tag-custom: ${{ needs.setup.outputs.tag }}
tag-custom-only: true
tags: |
type=raw,value=${{ needs.setup.outputs.tag }}
- name: Set up QEMU
uses: docker/setup-qemu-action@v2
uses: docker/setup-qemu-action@v3
if: ${{ needs.setup.outputs.publish == 'true' }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
uses: docker/setup-buildx-action@v3
- name: Login to DockerHub
if: ${{ needs.setup.outputs.publish == 'true' }}
uses: docker/login-action@v2
with:
username: ${{ secrets.ACRYL_DOCKER_USERNAME }}
Expand All @@ -74,56 +75,60 @@ jobs:
uses: docker/build-push-action@v6
with:
file: ./docker/datahub-actions/Dockerfile
platforms: linux/amd64,linux/arm64
platforms: ${{ needs.setup.outputs.publish == 'true' && 'linux/amd64,linux/arm64' || 'linux/amd64' }}
tags: ${{ steps.docker_meta.outputs.tags }}
push: ${{ needs.setup.outputs.publish == 'true' }}
cache-from: type=registry,ref=${{ steps.docker_meta.outputs.tags }}
cache-to: type=inline
target: final
build-args:
"GEM_FURY_TOKEN=${{ secrets.GEMFURY_PULL_TOKEN }}"
build-args: 'GEM_FURY_TOKEN=${{ secrets.GEMFURY_PULL_TOKEN }}'
slim_image:
name: Build & Push Image to DockerHub (slim)
runs-on: ubuntu-latest
if: ${{ needs.setup.outputs.publish == 'true' }}
needs: setup
steps:
- name: Check out the repo (slim)
uses: actions/checkout@v3
- name: Docker meta (slim)
id: docker_meta
uses: crazy-max/ghaction-docker-meta@v1
id: docker_meta_slim
uses: docker/metadata-action@v5
with:
images: |
acryldata/datahub-actions
tag-custom: ${{ needs.setup.outputs.tag }}
tag-custom-only: true
acryldata/datahub-actions-slim
tags: |
type=raw,value=${{ needs.setup.outputs.tag }}
- name: Set up QEMU (slim)
if: ${{ needs.setup.outputs.publish == 'true' }}
uses: docker/setup-qemu-action@v2
- name: Set up Docker Buildx (slim)
uses: docker/setup-buildx-action@v2
- name: Login to DockerHub (slim)
uses: docker/login-action@v2
if: ${{ needs.setup.outputs.publish == 'true' }}
with:
username: ${{ secrets.ACRYL_DOCKER_USERNAME }}
password: ${{ secrets.ACRYL_DOCKER_PASSWORD }}
- name: Docker meta (slim)
id: docker_meta_slim
uses: crazy-max/ghaction-docker-meta@v1
with:
images: |
acryldata/datahub-actions-slim
tag-custom: ${{ needs.setup.outputs.tag }}
tag-custom-only: true
- name: Build & Push Image (slim)
uses: docker/build-push-action@v6
with:
file: ./docker/datahub-actions/Dockerfile
platforms: linux/amd64,linux/arm64
platforms: ${{ needs.setup.outputs.publish == 'true' && 'linux/amd64,linux/arm64' || 'linux/amd64' }}
tags: ${{ steps.docker_meta_slim.outputs.tags }}
push: ${{ needs.setup.outputs.publish == 'true' }}
target: final
load: ${{ needs.setup.outputs.publish != 'true' }}
build-args: |
"APP_ENV=prod-slim"
"GEM_FURY_TOKEN=${{ secrets.GEMFURY_PULL_TOKEN }}"
- name: Save Docker image
if: needs.setup.outputs.publish != 'true'
run: docker save ${{ steps.docker_meta_slim.outputs.tags }} > image.tar
- name: Upload artifact
if: needs.setup.outputs.publish != 'true'
uses: actions/upload-artifact@v3
with:
name: docker-image
path: image.tar
# image_scan:
# permissions:
# contents: read # for actions/checkout to fetch code
Expand Down Expand Up @@ -151,7 +156,7 @@ jobs:
# output: 'trivy-results.sarif'
# severity: 'CRITICAL,HIGH'
# ignore-unfixed: true
# vuln-type: "os,library"
# vuln-type: "os,library"
# - name: Upload Trivy scan results to GitHub Security tab
# uses: github/codeql-action/upload-sarif@v2
# with:
Expand All @@ -161,15 +166,23 @@ jobs:
contents: read # for actions/checkout to fetch code
security-events: write # for github/codeql-action/upload-sarif to upload SARIF results
actions: read # only required for a private repository by github/codeql-action/upload-sarif to get the Action run status
name: "[Monitoring] Scan slim action images for vulnerabilities"
name: '[Monitoring] Scan slim action images for vulnerabilities'
runs-on: ubuntu-latest
needs: [setup, slim_image]
steps:
- name: Checkout # adding checkout step just to make trivy upload happy
uses: actions/checkout@v3
- name: Download image (slim)
uses: actions/checkout@v4
- name: Download artifact (if not publishing)
if: needs.setup.outputs.publish != 'true'
uses: actions/download-artifact@v3
with:
name: docker-image
- name: Load Docker image (if not publishing)
if: needs.setup.outputs.publish != 'true'
run: docker load < image.tar
- name: Download image (if publishing)
if: needs.setup.outputs.publish == 'true'
uses: ishworkh/docker-image-artifact-download@v1
if: ${{ needs.setup.outputs.publish != 'true' }}
with:
image: acryldata/datahub-actions-slim:${{ needs.setup.outputs.unique_tag }}
- name: Run Trivy vulnerability scanner (slim)
Expand All @@ -183,8 +196,63 @@ jobs:
output: 'trivy-results.sarif'
severity: 'CRITICAL,HIGH'
ignore-unfixed: true
vuln-type: "os,library"
vuln-type: 'os,library'
- name: Upload Trivy scan results to GitHub Security tab (slim)
uses: github/codeql-action/upload-sarif@v2
with:
sarif_file: 'trivy-results.sarif'
sarif_file: 'trivy-results.sarif'
smoke_test:
name: Run Smoke Tests
runs-on: ubuntu-latest
needs: [setup, slim_image]
steps:
- name: Free up disk space
run: |
sudo apt-get remove 'dotnet-*' azure-cli || true
sudo rm -rf /usr/local/lib/android/ || true
sudo docker image prune -a -f || true
- name: Disk Check
run: df -h . && docker images
- name: Checkout the repo
uses: actions/checkout@v4
- name: Set up JDK 17
uses: actions/setup-java@v3
with:
distribution: 'zulu'
java-version: 17
- uses: gradle/actions/setup-gradle@v3
- uses: actions/setup-python@v4
with:
python-version: '3.10'
cache: 'pip'
- name: Download artifact (if not publishing)
if: needs.setup.outputs.publish != 'true'
uses: actions/download-artifact@v3
with:
name: docker-image
- name: Load Docker image (if not publishing)
if: needs.setup.outputs.publish != 'true'
run: docker load < image.tar
- name: Download image (if publishing)
if: needs.setup.outputs.publish == 'true'
uses: ishworkh/docker-image-artifact-download@v1
with:
image: acryldata/datahub-actions-slim:${{ needs.setup.outputs.unique_tag }}
- name: run quickstart
env:
DATAHUB_TELEMETRY_ENABLED: false
DATAHUB_ACTIONS_IMAGE: acryldata/datahub-actions-slim
DATAHUB_ACTIONS_VERSION: ${{ needs.setup.outputs.unique_tag }}
ACTIONS_EXTRA_PACKAGES: 'acryl-datahub-actions[executor]==0.0.13 acryl-datahub-actions==0.0.13 acryl-datahub==0.10.5'
ACTIONS_CONFIG: 'https://raw.githubusercontent.com/acryldata/datahub-actions/main/docker/config/executor.yaml'
run: |
./smoke-test/run-quickstart.sh
- name: Disk Check
run: df -h . && docker images
- name: Smoke test
env:
RUN_QUICKSTART: false
DATAHUB_ACTIONS_VERSION: ${{ needs.setup.outputs.unique_tag }}
run: |
echo "$DATAHUB_ACTIONS_VERSION"
./smoke-test/smoke.sh
6 changes: 5 additions & 1 deletion datahub-actions/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def get_long_description():
return description


acryl_datahub_min_version = os.environ.get("ACRYL_DATAHUB_MIN_VERSION") or "0.12.1.5"
acryl_datahub_min_version = os.environ.get("ACRYL_DATAHUB_MIN_VERSION") or "0.13.3.6rc1"

base_requirements = {
f"acryl-datahub[datahub-kafka]>={acryl_datahub_min_version}",
Expand Down Expand Up @@ -86,6 +86,7 @@ def get_long_description():
"snowflake_tag_propagation": {
f"acryl-datahub[snowflake]>={acryl_datahub_min_version}"
},
"doc_propagation": set(),
# Transformer Plugins (None yet)
}

Expand Down Expand Up @@ -138,6 +139,7 @@ def get_long_description():
"tag_propagation",
"term_propagation",
"snowflake_tag_propagation",
"doc_propagation",
]
for dependency in plugins[plugin]
),
Expand All @@ -158,6 +160,7 @@ def get_long_description():
"tag_propagation",
"term_propagation",
"snowflake_tag_propagation",
"doc_propagation",
]
for dependency in plugins[plugin]
),
Expand All @@ -173,6 +176,7 @@ def get_long_description():
"tag_propagation = datahub_actions.plugin.action.tag.tag_propagation_action:TagPropagationAction",
"term_propagation = datahub_actions.plugin.action.term.term_propagation_action:TermPropagationAction",
"snowflake_tag_propagation = datahub_actions.plugin.action.snowflake.tag_propagator:SnowflakeTagPropagatorAction",
"doc_propagation = datahub_actions.plugin.action.propagation.docs.propagation_action:DocPropagationAction",
],
"datahub_actions.transformer.plugins": [],
"datahub_actions.source.plugins": [],
Expand Down
10 changes: 10 additions & 0 deletions datahub-actions/src/datahub_actions/api/action_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,16 @@ def get_downstreams(self, entity_urn: str) -> List[str]:
return entities
return []

def get_upstreams(self, entity_urn: str) -> List[str]:
url_frag = f"/relationships?direction=OUTGOING&types=List(DownstreamOf)&urn={urllib.parse.quote(entity_urn)}"
url = f"{self.graph._gms_server}{url_frag}"
response = self.graph._get_generic(url)
if response["count"] > 0:
relnships = response["relationships"]
entities = [x["entity"] for x in relnships]
return entities
return []

def get_relationships(
self, entity_urn: str, direction: str, relationship_types: List[str]
) -> List[str]:
Expand Down
56 changes: 56 additions & 0 deletions datahub-actions/src/datahub_actions/plugin/action/mcl_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Copyright 2021 Acryl Data, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Any, Callable

from datahub.metadata.schema_classes import MetadataChangeLogClass

from datahub_actions.event.event_envelope import EventEnvelope
from datahub_actions.event.event_registry import METADATA_CHANGE_LOG_EVENT_V1_TYPE


class MCLProcessor:
"""
A utility class to register and process MetadataChangeLog events.
"""

def __init__(self) -> None:
self.entity_aspect_processors: dict[str, dict[str, Callable]] = {}
pass

def is_mcl(self, event: EventEnvelope) -> bool:
return event.event_type is METADATA_CHANGE_LOG_EVENT_V1_TYPE

def register_processor(
self, entity_type: str, aspect: str, processor: Callable
) -> None:
if entity_type not in self.entity_aspect_processors:
self.entity_aspect_processors[entity_type] = {}
self.entity_aspect_processors[entity_type][aspect] = processor

def process(self, event: EventEnvelope) -> Any:

if isinstance(event.event, MetadataChangeLogClass):
entity_type = event.event.entityType
aspect = event.event.aspectName
if (
entity_type in self.entity_aspect_processors
and aspect in self.entity_aspect_processors[entity_type]
):
return self.entity_aspect_processors[entity_type][aspect](
entity_urn=event.event.entityUrn,
aspect_name=event.event.aspectName,
aspect_value=event.event.aspect,
previous_aspect_value=event.event.previousAspectValue,
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright 2021 Acryl Data, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright 2021 Acryl Data, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Loading

0 comments on commit 36aa93e

Please sign in to comment.