diff --git a/.github/workflows/dagster-plugin.yml b/.github/workflows/dagster-plugin.yml index d8a9cd7bfd6a3..ae9a0b1605cdf 100644 --- a/.github/workflows/dagster-plugin.yml +++ b/.github/workflows/dagster-plugin.yml @@ -30,11 +30,11 @@ jobs: DATAHUB_TELEMETRY_ENABLED: false strategy: matrix: - python-version: ["3.9", "3.10"] + python-version: ["3.9", "3.11"] include: - python-version: "3.9" extraPythonRequirement: "dagster>=1.3.3" - - python-version: "3.10" + - python-version: "3.11" extraPythonRequirement: "dagster>=1.3.3" fail-fast: false steps: @@ -57,7 +57,7 @@ jobs: if: always() run: source metadata-ingestion-modules/dagster-plugin/venv/bin/activate && uv pip freeze - uses: actions/upload-artifact@v4 - if: ${{ always() && matrix.python-version == '3.10' && matrix.extraPythonRequirement == 'dagster>=1.3.3' }} + if: ${{ always() && matrix.python-version == '3.11' && matrix.extraPythonRequirement == 'dagster>=1.3.3' }} with: name: Test Results (dagster Plugin ${{ matrix.python-version}}) path: | diff --git a/.github/workflows/metadata-ingestion.yml b/.github/workflows/metadata-ingestion.yml index ad00c6d1551d1..106cba1473982 100644 --- a/.github/workflows/metadata-ingestion.yml +++ b/.github/workflows/metadata-ingestion.yml @@ -33,7 +33,7 @@ jobs: # DATAHUB_LOOKML_GIT_TEST_SSH_KEY: ${{ secrets.DATAHUB_LOOKML_GIT_TEST_SSH_KEY }} strategy: matrix: - python-version: ["3.8", "3.10"] + python-version: ["3.8", "3.11"] command: [ "testQuick", @@ -43,7 +43,7 @@ jobs: ] include: - python-version: "3.8" - - python-version: "3.10" + - python-version: "3.11" fail-fast: false steps: - name: Free up disk space diff --git a/.github/workflows/prefect-plugin.yml b/.github/workflows/prefect-plugin.yml index e4a70426f3a61..d77142a1f00de 100644 --- a/.github/workflows/prefect-plugin.yml +++ b/.github/workflows/prefect-plugin.yml @@ -30,7 +30,7 @@ jobs: DATAHUB_TELEMETRY_ENABLED: false strategy: matrix: - python-version: ["3.8", "3.9", "3.10"] + python-version: ["3.8", "3.9", "3.10", "3.11"] fail-fast: false steps: - name: Set up JDK 17 @@ -52,7 +52,7 @@ jobs: if: always() run: source metadata-ingestion-modules/prefect-plugin/venv/bin/activate && uv pip freeze - uses: actions/upload-artifact@v4 - if: ${{ always() && matrix.python-version == '3.10'}} + if: ${{ always() && matrix.python-version == '3.11'}} with: name: Test Results (Prefect Plugin ${{ matrix.python-version}}) path: | diff --git a/.github/workflows/qodana-scan.yml b/.github/workflows/qodana-scan.yml deleted file mode 100644 index 750cf24ad38e5..0000000000000 --- a/.github/workflows/qodana-scan.yml +++ /dev/null @@ -1,23 +0,0 @@ -name: Qodana -on: - workflow_dispatch: - pull_request: - push: - branches: - - master - -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} - cancel-in-progress: true - -jobs: - qodana: - runs-on: ubuntu-latest - steps: - - uses: acryldata/sane-checkout-action@v3 - - name: "Qodana Scan" - uses: JetBrains/qodana-action@v2022.3.4 - - uses: github/codeql-action/upload-sarif@v2 - with: - sarif_file: ${{ runner.temp }}/qodana/results/qodana.sarif.json - cache-default-branch-only: true diff --git a/docs-website/src/styles/global.scss b/docs-website/src/styles/global.scss index 31261ea3d940d..855e75c69613f 100644 --- a/docs-website/src/styles/global.scss +++ b/docs-website/src/styles/global.scss @@ -109,7 +109,7 @@ div[class^="announcementBar"] { div { display: flex; align-items: center; - padding: 0 1rem; + padding: 0 .5rem; justify-content: space-between; font-size: 1rem; @@ -156,7 +156,10 @@ div[class^="announcementBar"] { a { color: #EFB300; text-decoration: none; - font-size: 1rem + font-size: 1rem; + padding-right: 0; + padding-left: 12px; + min-width: 108px; } } } diff --git a/docs-website/static/img/solutions/observe-tile-7.png b/docs-website/static/img/solutions/observe-tile-7.png index 2b882d54857ba..00c1ded63c967 100644 Binary files a/docs-website/static/img/solutions/observe-tile-7.png and b/docs-website/static/img/solutions/observe-tile-7.png differ diff --git a/metadata-ingestion-modules/airflow-plugin/setup.py b/metadata-ingestion-modules/airflow-plugin/setup.py index 3209233184d55..2693aab0700da 100644 --- a/metadata-ingestion-modules/airflow-plugin/setup.py +++ b/metadata-ingestion-modules/airflow-plugin/setup.py @@ -148,10 +148,6 @@ def get_long_description(): "Programming Language :: Python", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", "Intended Audience :: Developers", "Intended Audience :: Information Technology", "Intended Audience :: System Administrators", diff --git a/metadata-ingestion-modules/dagster-plugin/README.md b/metadata-ingestion-modules/dagster-plugin/README.md index 8e1460957ed9f..5113fc37dcc22 100644 --- a/metadata-ingestion-modules/dagster-plugin/README.md +++ b/metadata-ingestion-modules/dagster-plugin/README.md @@ -1,4 +1,3 @@ # Datahub Dagster Plugin -See the DataHub Dagster docs for details. - +See the [DataHub Dagster docs](https://datahubproject.io/docs/lineage/dagster/) for details. diff --git a/metadata-ingestion-modules/dagster-plugin/setup.py b/metadata-ingestion-modules/dagster-plugin/setup.py index 0e0685cb378c1..22c15497bd807 100644 --- a/metadata-ingestion-modules/dagster-plugin/setup.py +++ b/metadata-ingestion-modules/dagster-plugin/setup.py @@ -107,9 +107,6 @@ def get_long_description(): "Programming Language :: Python", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", "Intended Audience :: Developers", "Intended Audience :: Information Technology", "Intended Audience :: System Administrators", diff --git a/metadata-ingestion-modules/gx-plugin/README.md b/metadata-ingestion-modules/gx-plugin/README.md index 1ffd87a955432..9d50235a093d6 100644 --- a/metadata-ingestion-modules/gx-plugin/README.md +++ b/metadata-ingestion-modules/gx-plugin/README.md @@ -1,4 +1,3 @@ # Datahub GX Plugin -See the DataHub GX docs for details. - +See the [DataHub GX docs](https://datahubproject.io/docs/metadata-ingestion/integration_docs/great-expectations) for details. diff --git a/metadata-ingestion-modules/gx-plugin/setup.py b/metadata-ingestion-modules/gx-plugin/setup.py index 73d5d1a9a02f1..40afc81a98f9c 100644 --- a/metadata-ingestion-modules/gx-plugin/setup.py +++ b/metadata-ingestion-modules/gx-plugin/setup.py @@ -118,9 +118,6 @@ def get_long_description(): "Programming Language :: Python", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", "Intended Audience :: Developers", "Intended Audience :: Information Technology", "Intended Audience :: System Administrators", diff --git a/metadata-ingestion-modules/prefect-plugin/README.md b/metadata-ingestion-modules/prefect-plugin/README.md index 0896942e78ef6..f21e00b494513 100644 --- a/metadata-ingestion-modules/prefect-plugin/README.md +++ b/metadata-ingestion-modules/prefect-plugin/README.md @@ -28,7 +28,7 @@ The `prefect-datahub` collection allows you to easily integrate DataHub's metada ## Prerequisites -- Python 3.7+ +- Python 3.8+ - Prefect 2.0.0+ and < 3.0.0+ - A running instance of DataHub diff --git a/metadata-ingestion-modules/prefect-plugin/setup.py b/metadata-ingestion-modules/prefect-plugin/setup.py index 7e56fe8b6ad11..70b0e95819564 100644 --- a/metadata-ingestion-modules/prefect-plugin/setup.py +++ b/metadata-ingestion-modules/prefect-plugin/setup.py @@ -103,10 +103,6 @@ def get_long_description(): "Programming Language :: Python", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", "Intended Audience :: Developers", "Intended Audience :: Information Technology", "Intended Audience :: System Administrators", @@ -120,7 +116,7 @@ def get_long_description(): ], # Package info. zip_safe=False, - python_requires=">=3.7", + python_requires=">=3.8", package_dir={"": "src"}, packages=setuptools.find_namespace_packages(where="./src"), entry_points=entry_points, diff --git a/metadata-ingestion/scripts/avro_codegen.py b/metadata-ingestion/scripts/avro_codegen.py index e5792da32fb5d..2841985ad0780 100644 --- a/metadata-ingestion/scripts/avro_codegen.py +++ b/metadata-ingestion/scripts/avro_codegen.py @@ -154,7 +154,6 @@ def merge_schemas(schemas_obj: List[dict]) -> str: # Patch add_name method to NOT complain about duplicate names. class NamesWithDups(avro.schema.Names): def add_name(self, name_attr, space_attr, new_schema): - to_add = avro.schema.Name(name_attr, space_attr, self.default_namespace) assert to_add.name assert to_add.space @@ -626,7 +625,7 @@ def generate_urn_class(entity_type: str, key_aspect: dict) -> str: class {class_name}(_SpecificUrn): ENTITY_TYPE: ClassVar[str] = "{entity_type}" - URN_PARTS: ClassVar[int] = {arg_count} + _URN_PARTS: ClassVar[int] = {arg_count} def __init__(self, {init_args}, *, _allow_coercion: bool = True) -> None: if _allow_coercion: @@ -640,8 +639,8 @@ def __init__(self, {init_args}, *, _allow_coercion: bool = True) -> None: @classmethod def _parse_ids(cls, entity_ids: List[str]) -> "{class_name}": - if len(entity_ids) != cls.URN_PARTS: - raise InvalidUrnError(f"{class_name} should have {{cls.URN_PARTS}} parts, got {{len(entity_ids)}}: {{entity_ids}}") + if len(entity_ids) != cls._URN_PARTS: + raise InvalidUrnError(f"{class_name} should have {{cls._URN_PARTS}} parts, got {{len(entity_ids)}}: {{entity_ids}}") return cls({parse_ids_mapping}, _allow_coercion=False) @classmethod diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index c6994dd6d5aa6..8357262537bcf 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -15,7 +15,7 @@ base_requirements = { # Our min version of typing_extensions is somewhat constrained by Airflow. - "typing_extensions>=3.10.0.2", + "typing_extensions>=4.2.0", # Actual dependencies. "typing-inspect", # pydantic 1.8.2 is incompatible with mypy 0.910. @@ -298,8 +298,8 @@ } data_lake_profiling = { - "pydeequ~=1.1.0", - "pyspark~=3.3.0", + "pydeequ>=1.1.0", + "pyspark~=3.5.0", } delta_lake = { @@ -318,7 +318,7 @@ # 0.1.11 appears to have authentication issues with azure databricks # 0.22.0 has support for `include_browse` in metadata list apis "databricks-sdk>=0.30.0", - "pyspark~=3.3.0", + "pyspark~=3.5.0", "requests", # Version 2.4.0 includes sqlalchemy dialect, 2.8.0 includes some bug fixes # Version 3.0.0 required SQLAlchemy > 2.0.21 @@ -874,9 +874,6 @@ "Programming Language :: Python", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", "Intended Audience :: Developers", "Intended Audience :: Information Technology", "Intended Audience :: System Administrators", @@ -917,6 +914,7 @@ "sync-file-emitter", "sql-parser", "iceberg", + "feast", } else set() ) diff --git a/metadata-ingestion/src/datahub/api/circuit_breaker/assertion_circuit_breaker.py b/metadata-ingestion/src/datahub/api/circuit_breaker/assertion_circuit_breaker.py index 9d2a65663ba37..283cdaa833333 100644 --- a/metadata-ingestion/src/datahub/api/circuit_breaker/assertion_circuit_breaker.py +++ b/metadata-ingestion/src/datahub/api/circuit_breaker/assertion_circuit_breaker.py @@ -1,6 +1,6 @@ import logging from dataclasses import dataclass -from datetime import datetime, timedelta +from datetime import datetime, timedelta, timezone from typing import Any, Dict, List, Optional from pydantic import Field @@ -10,6 +10,7 @@ CircuitBreakerConfig, ) from datahub.api.graphql import Assertion, Operation +from datahub.emitter.mce_builder import parse_ts_millis logger: logging.Logger = logging.getLogger(__name__) @@ -49,7 +50,7 @@ def get_last_updated(self, urn: str) -> Optional[datetime]: if not operations: return None else: - return datetime.fromtimestamp(operations[0]["lastUpdatedTimestamp"] / 1000) + return parse_ts_millis(operations[0]["lastUpdatedTimestamp"]) def _check_if_assertion_failed( self, assertions: List[Dict[str, Any]], last_updated: Optional[datetime] = None @@ -93,7 +94,7 @@ class AssertionResult: logger.info(f"Found successful assertion: {assertion_urn}") result = False if last_updated is not None: - last_run = datetime.fromtimestamp(last_assertion.time / 1000) + last_run = parse_ts_millis(last_assertion.time) if last_updated > last_run: logger.error( f"Missing assertion run for {assertion_urn}. The dataset was updated on {last_updated} but the last assertion run was at {last_run}" @@ -117,7 +118,7 @@ def is_circuit_breaker_active(self, urn: str) -> bool: ) if not last_updated: - last_updated = datetime.now() - self.config.time_delta + last_updated = datetime.now(tz=timezone.utc) - self.config.time_delta logger.info( f"Dataset {urn} doesn't have last updated or check_last_assertion_time is false, using calculated min assertion date {last_updated}" ) diff --git a/metadata-ingestion/src/datahub/configuration/common.py b/metadata-ingestion/src/datahub/configuration/common.py index 7df007e087979..08817d9d5fdb9 100644 --- a/metadata-ingestion/src/datahub/configuration/common.py +++ b/metadata-ingestion/src/datahub/configuration/common.py @@ -10,7 +10,6 @@ List, Optional, Type, - TypeVar, Union, runtime_checkable, ) @@ -19,14 +18,12 @@ from cached_property import cached_property from pydantic import BaseModel, Extra, ValidationError from pydantic.fields import Field -from typing_extensions import Protocol +from typing_extensions import Protocol, Self from datahub.configuration._config_enum import ConfigEnum as ConfigEnum # noqa: I250 from datahub.configuration.pydantic_migration_helpers import PYDANTIC_VERSION_2 from datahub.utilities.dedup_list import deduplicate_list -_ConfigSelf = TypeVar("_ConfigSelf", bound="ConfigModel") - REDACT_KEYS = { "password", "token", @@ -109,7 +106,7 @@ def _schema_extra(schema: Dict[str, Any], model: Type["ConfigModel"]) -> None: schema_extra = _schema_extra @classmethod - def parse_obj_allow_extras(cls: Type[_ConfigSelf], obj: Any) -> _ConfigSelf: + def parse_obj_allow_extras(cls, obj: Any) -> Self: if PYDANTIC_VERSION_2: try: with unittest.mock.patch.dict( diff --git a/metadata-ingestion/src/datahub/emitter/mce_builder.py b/metadata-ingestion/src/datahub/emitter/mce_builder.py index 69946c575908b..110624aa61cb8 100644 --- a/metadata-ingestion/src/datahub/emitter/mce_builder.py +++ b/metadata-ingestion/src/datahub/emitter/mce_builder.py @@ -6,7 +6,7 @@ import os import re import time -from datetime import datetime +from datetime import datetime, timezone from enum import Enum from typing import ( TYPE_CHECKING, @@ -103,6 +103,22 @@ def make_ts_millis(ts: Optional[datetime]) -> Optional[int]: return int(ts.timestamp() * 1000) +@overload +def parse_ts_millis(ts: float) -> datetime: + ... + + +@overload +def parse_ts_millis(ts: None) -> None: + ... + + +def parse_ts_millis(ts: Optional[float]) -> Optional[datetime]: + if ts is None: + return None + return datetime.fromtimestamp(ts / 1000, tz=timezone.utc) + + def make_data_platform_urn(platform: str) -> str: if platform.startswith("urn:li:dataPlatform:"): return platform diff --git a/metadata-ingestion/src/datahub/emitter/mcp_builder.py b/metadata-ingestion/src/datahub/emitter/mcp_builder.py index 293157f8a1ed0..c8eb62a2e1de2 100644 --- a/metadata-ingestion/src/datahub/emitter/mcp_builder.py +++ b/metadata-ingestion/src/datahub/emitter/mcp_builder.py @@ -4,8 +4,8 @@ from pydantic.main import BaseModel from datahub.cli.env_utils import get_boolean_env_variable -from datahub.emitter.enum_helpers import get_enum_options from datahub.emitter.mce_builder import ( + ALL_ENV_TYPES, Aspect, datahub_guid, make_container_urn, @@ -25,7 +25,6 @@ ContainerClass, DomainsClass, EmbedClass, - FabricTypeClass, GlobalTagsClass, MetadataChangeEventClass, OwnerClass, @@ -206,11 +205,7 @@ def gen_containers( # Extra validation on the env field. # In certain cases (mainly for backwards compatibility), the env field will actually # have a platform instance name. - env = ( - container_key.env - if container_key.env in get_enum_options(FabricTypeClass) - else None - ) + env = container_key.env if container_key.env in ALL_ENV_TYPES else None container_urn = container_key.as_urn() diff --git a/metadata-ingestion/src/datahub/emitter/mcp_patch_builder.py b/metadata-ingestion/src/datahub/emitter/mcp_patch_builder.py index 779b42e1e1ee9..1ed8ce1d5a615 100644 --- a/metadata-ingestion/src/datahub/emitter/mcp_patch_builder.py +++ b/metadata-ingestion/src/datahub/emitter/mcp_patch_builder.py @@ -2,7 +2,7 @@ import time from collections import defaultdict from dataclasses import dataclass -from typing import Any, Dict, Iterable, List, Optional, Sequence, Union +from typing import Any, Dict, List, Optional, Sequence, Union from datahub.emitter.aspect import JSON_PATCH_CONTENT_TYPE from datahub.emitter.serialization_helper import pre_json_transform @@ -75,7 +75,7 @@ def _add_patch( # TODO: Validate that aspectName is a valid aspect for this entityType self.patches[aspect_name].append(_Patch(op, path, value)) - def build(self) -> Iterable[MetadataChangeProposalClass]: + def build(self) -> List[MetadataChangeProposalClass]: return [ MetadataChangeProposalClass( entityUrn=self.urn, diff --git a/metadata-ingestion/src/datahub/emitter/rest_emitter.py b/metadata-ingestion/src/datahub/emitter/rest_emitter.py index 675717b5ec482..04242c8bf45d2 100644 --- a/metadata-ingestion/src/datahub/emitter/rest_emitter.py +++ b/metadata-ingestion/src/datahub/emitter/rest_emitter.py @@ -3,7 +3,7 @@ import logging import os from json.decoder import JSONDecodeError -from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Union +from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Sequence, Union import requests from deprecated import deprecated @@ -288,7 +288,7 @@ def emit_mcp( def emit_mcps( self, - mcps: List[Union[MetadataChangeProposal, MetadataChangeProposalWrapper]], + mcps: Sequence[Union[MetadataChangeProposal, MetadataChangeProposalWrapper]], async_flag: Optional[bool] = None, ) -> int: logger.debug("Attempting to emit batch mcps") diff --git a/metadata-ingestion/src/datahub/ingestion/api/closeable.py b/metadata-ingestion/src/datahub/ingestion/api/closeable.py index 80a5008ed6368..7b8e1a36162c9 100644 --- a/metadata-ingestion/src/datahub/ingestion/api/closeable.py +++ b/metadata-ingestion/src/datahub/ingestion/api/closeable.py @@ -1,9 +1,9 @@ from abc import abstractmethod from contextlib import AbstractContextManager from types import TracebackType -from typing import Optional, Type, TypeVar +from typing import Optional, Type -_Self = TypeVar("_Self", bound="Closeable") +from typing_extensions import Self class Closeable(AbstractContextManager): @@ -11,7 +11,7 @@ class Closeable(AbstractContextManager): def close(self) -> None: pass - def __enter__(self: _Self) -> _Self: + def __enter__(self) -> Self: # This method is mainly required for type checking. return self diff --git a/metadata-ingestion/src/datahub/ingestion/api/ingestion_job_checkpointing_provider_base.py b/metadata-ingestion/src/datahub/ingestion/api/ingestion_job_checkpointing_provider_base.py index 3680546d307d9..c1a49ce82e6e0 100644 --- a/metadata-ingestion/src/datahub/ingestion/api/ingestion_job_checkpointing_provider_base.py +++ b/metadata-ingestion/src/datahub/ingestion/api/ingestion_job_checkpointing_provider_base.py @@ -1,6 +1,8 @@ from abc import abstractmethod from dataclasses import dataclass -from typing import Any, Dict, NewType, Optional, Type, TypeVar +from typing import Any, Dict, NewType, Optional + +from typing_extensions import Self import datahub.emitter.mce_builder as builder from datahub.configuration.common import ConfigModel @@ -17,9 +19,6 @@ class IngestionCheckpointingProviderConfig(ConfigModel): pass -_Self = TypeVar("_Self", bound="IngestionCheckpointingProviderBase") - - @dataclass() class IngestionCheckpointingProviderBase(StatefulCommittable[CheckpointJobStatesMap]): """ @@ -32,9 +31,7 @@ def __init__(self, name: str, commit_policy: CommitPolicy = CommitPolicy.ALWAYS) @classmethod @abstractmethod - def create( - cls: Type[_Self], config_dict: Dict[str, Any], ctx: PipelineContext - ) -> "_Self": + def create(cls, config_dict: Dict[str, Any], ctx: PipelineContext) -> Self: pass @abstractmethod diff --git a/metadata-ingestion/src/datahub/ingestion/api/report.py b/metadata-ingestion/src/datahub/ingestion/api/report.py index ade2832f1b669..32810189acd00 100644 --- a/metadata-ingestion/src/datahub/ingestion/api/report.py +++ b/metadata-ingestion/src/datahub/ingestion/api/report.py @@ -42,7 +42,10 @@ def to_pure_python_obj(some_val: Any) -> Any: return some_val.as_obj() elif isinstance(some_val, pydantic.BaseModel): return Report.to_pure_python_obj(some_val.dict()) - elif dataclasses.is_dataclass(some_val): + elif dataclasses.is_dataclass(some_val) and not isinstance(some_val, type): + # The `is_dataclass` function returns `True` for both instances and classes. + # We need an extra check to ensure an instance was passed in. + # https://docs.python.org/3/library/dataclasses.html#dataclasses.is_dataclass return dataclasses.asdict(some_val) elif isinstance(some_val, list): return [Report.to_pure_python_obj(v) for v in some_val if v is not None] diff --git a/metadata-ingestion/src/datahub/ingestion/api/sink.py b/metadata-ingestion/src/datahub/ingestion/api/sink.py index 62feb7b5a02e6..655e6bb22fa8d 100644 --- a/metadata-ingestion/src/datahub/ingestion/api/sink.py +++ b/metadata-ingestion/src/datahub/ingestion/api/sink.py @@ -3,6 +3,8 @@ from dataclasses import dataclass, field from typing import Any, Generic, Optional, Type, TypeVar, cast +from typing_extensions import Self + from datahub.configuration.common import ConfigModel from datahub.ingestion.api.closeable import Closeable from datahub.ingestion.api.common import PipelineContext, RecordEnvelope, WorkUnit @@ -79,7 +81,6 @@ def on_failure( SinkReportType = TypeVar("SinkReportType", bound=SinkReport, covariant=True) SinkConfig = TypeVar("SinkConfig", bound=ConfigModel, covariant=True) -Self = TypeVar("Self", bound="Sink") class Sink(Generic[SinkConfig, SinkReportType], Closeable, metaclass=ABCMeta): @@ -90,7 +91,7 @@ class Sink(Generic[SinkConfig, SinkReportType], Closeable, metaclass=ABCMeta): report: SinkReportType @classmethod - def get_config_class(cls: Type[Self]) -> Type[SinkConfig]: + def get_config_class(cls) -> Type[SinkConfig]: config_class = get_class_from_annotation(cls, Sink, ConfigModel) assert config_class, "Sink subclasses must define a config class" return cast(Type[SinkConfig], config_class) @@ -112,7 +113,7 @@ def __post_init__(self) -> None: pass @classmethod - def create(cls: Type[Self], config_dict: dict, ctx: PipelineContext) -> "Self": + def create(cls, config_dict: dict, ctx: PipelineContext) -> "Self": return cls(ctx, cls.get_config_class().parse_obj(config_dict)) def handle_work_unit_start(self, workunit: WorkUnit) -> None: diff --git a/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py b/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py index 7791ea2797be3..f3e5b1db6a1c8 100644 --- a/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py +++ b/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py @@ -1,5 +1,4 @@ import logging -from datetime import datetime, timezone from typing import ( TYPE_CHECKING, Dict, @@ -14,7 +13,7 @@ ) from datahub.configuration.time_window_config import BaseTimeWindowConfig -from datahub.emitter.mce_builder import make_dataplatform_instance_urn +from datahub.emitter.mce_builder import make_dataplatform_instance_urn, parse_ts_millis from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.emitter.mcp_builder import entity_supports_aspect from datahub.ingestion.api.workunit import MetadataWorkUnit @@ -479,10 +478,7 @@ def auto_empty_dataset_usage_statistics( if invalid_timestamps: logger.warning( f"Usage statistics with unexpected timestamps, bucket_duration={config.bucket_duration}:\n" - ", ".join( - str(datetime.fromtimestamp(ts / 1000, tz=timezone.utc)) - for ts in invalid_timestamps - ) + ", ".join(str(parse_ts_millis(ts)) for ts in invalid_timestamps) ) for bucket in bucket_timestamps: diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py index 3ce34be8dc89d..cbe1f6eb97824 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py @@ -1,7 +1,7 @@ import logging from collections import defaultdict from dataclasses import dataclass, field -from datetime import datetime, timezone +from datetime import datetime from functools import lru_cache from typing import Any, Dict, FrozenSet, Iterable, Iterator, List, Optional @@ -15,6 +15,7 @@ TimePartitioningType, ) +from datahub.emitter.mce_builder import parse_ts_millis from datahub.ingestion.api.source import SourceReport from datahub.ingestion.source.bigquery_v2.bigquery_audit import BigqueryTableIdentifier from datahub.ingestion.source.bigquery_v2.bigquery_helper import parse_labels @@ -393,13 +394,7 @@ def _make_bigquery_table( name=table.table_name, created=table.created, table_type=table.table_type, - last_altered=( - datetime.fromtimestamp( - table.get("last_altered") / 1000, tz=timezone.utc - ) - if table.get("last_altered") is not None - else None - ), + last_altered=parse_ts_millis(table.get("last_altered")), size_in_bytes=table.get("bytes"), rows_count=table.get("row_count"), comment=table.comment, @@ -460,11 +455,7 @@ def _make_bigquery_view(view: bigquery.Row) -> BigqueryView: return BigqueryView( name=view.table_name, created=view.created, - last_altered=( - datetime.fromtimestamp(view.get("last_altered") / 1000, tz=timezone.utc) - if view.get("last_altered") is not None - else None - ), + last_altered=(parse_ts_millis(view.get("last_altered"))), comment=view.comment, view_definition=view.view_definition, materialized=view.table_type == BigqueryTableType.MATERIALIZED_VIEW, @@ -705,13 +696,7 @@ def _make_bigquery_table_snapshot(snapshot: bigquery.Row) -> BigqueryTableSnapsh return BigqueryTableSnapshot( name=snapshot.table_name, created=snapshot.created, - last_altered=( - datetime.fromtimestamp( - snapshot.get("last_altered") / 1000, tz=timezone.utc - ) - if snapshot.get("last_altered") is not None - else None - ), + last_altered=parse_ts_millis(snapshot.get("last_altered")), comment=snapshot.comment, ddl=snapshot.ddl, snapshot_time=snapshot.snapshot_time, diff --git a/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_kafka_reader.py b/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_kafka_reader.py index 56a3d55abb184..ba073533eccfb 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_kafka_reader.py +++ b/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_kafka_reader.py @@ -12,6 +12,7 @@ from confluent_kafka.schema_registry.avro import AvroDeserializer from datahub.configuration.kafka import KafkaConsumerConnectionConfig +from datahub.emitter.mce_builder import parse_ts_millis from datahub.ingestion.api.closeable import Closeable from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.source.datahub.config import DataHubSourceConfig @@ -92,7 +93,7 @@ def _poll_partition( if mcl.created and mcl.created.time > stop_time.timestamp() * 1000: logger.info( f"Stopped reading from kafka, reached MCL " - f"with audit stamp {datetime.fromtimestamp(mcl.created.time / 1000)}" + f"with audit stamp {parse_ts_millis(mcl.created.time)}" ) break diff --git a/metadata-ingestion/src/datahub/ingestion/source/gc/dataprocess_cleanup.py b/metadata-ingestion/src/datahub/ingestion/source/gc/dataprocess_cleanup.py index 6d16aaab2d798..3f7a1fc453bcd 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/gc/dataprocess_cleanup.py +++ b/metadata-ingestion/src/datahub/ingestion/source/gc/dataprocess_cleanup.py @@ -170,6 +170,8 @@ class DataProcessCleanupReport(SourceReport): sample_removed_aspects_by_type: TopKDict[str, LossyList[str]] = field( default_factory=TopKDict ) + num_data_flows_found: int = 0 + num_data_jobs_found: int = 0 class DataProcessCleanup: @@ -265,13 +267,17 @@ def keep_last_n_dpi( self.report.report_failure( f"Exception while deleting DPI: {e}", exc=e ) - if deleted_count_last_n % self.config.batch_size == 0: + if ( + deleted_count_last_n % self.config.batch_size == 0 + and deleted_count_last_n > 0 + ): logger.info(f"Deleted {deleted_count_last_n} DPIs from {job.urn}") if self.config.delay: logger.info(f"Sleeping for {self.config.delay} seconds") time.sleep(self.config.delay) - logger.info(f"Deleted {deleted_count_last_n} DPIs from {job.urn}") + if deleted_count_last_n > 0: + logger.info(f"Deleted {deleted_count_last_n} DPIs from {job.urn}") def delete_entity(self, urn: str, type: str) -> None: assert self.ctx.graph @@ -351,7 +357,10 @@ def remove_old_dpis( except Exception as e: self.report.report_failure(f"Exception while deleting DPI: {e}", exc=e) - if deleted_count_retention % self.config.batch_size == 0: + if ( + deleted_count_retention % self.config.batch_size == 0 + and deleted_count_retention > 0 + ): logger.info( f"Deleted {deleted_count_retention} DPIs from {job.urn} due to retention" ) @@ -393,6 +402,7 @@ def get_data_flows(self) -> Iterable[DataFlowEntity]: scrollAcrossEntities = result.get("scrollAcrossEntities") if not scrollAcrossEntities: raise ValueError("Missing scrollAcrossEntities in response") + self.report.num_data_flows_found += scrollAcrossEntities.get("count") logger.info(f"Got {scrollAcrossEntities.get('count')} DataFlow entities") scroll_id = scrollAcrossEntities.get("nextScrollId") @@ -415,8 +425,9 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: assert self.ctx.graph dataFlows: Dict[str, DataFlowEntity] = {} - for flow in self.get_data_flows(): - dataFlows[flow.urn] = flow + if self.config.delete_empty_data_flows: + for flow in self.get_data_flows(): + dataFlows[flow.urn] = flow scroll_id: Optional[str] = None previous_scroll_id: Optional[str] = None @@ -443,6 +454,7 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: if not scrollAcrossEntities: raise ValueError("Missing scrollAcrossEntities in response") + self.report.num_data_jobs_found += scrollAcrossEntities.get("count") logger.info(f"Got {scrollAcrossEntities.get('count')} DataJob entities") scroll_id = scrollAcrossEntities.get("nextScrollId") @@ -481,7 +493,8 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: previous_scroll_id = scroll_id - logger.info(f"Deleted {deleted_jobs} DataJobs") + if deleted_jobs > 0: + logger.info(f"Deleted {deleted_jobs} DataJobs") # Delete empty dataflows if needed if self.config.delete_empty_data_flows: deleted_data_flows: int = 0 diff --git a/metadata-ingestion/src/datahub/ingestion/source/s3/source.py b/metadata-ingestion/src/datahub/ingestion/source/s3/source.py index 3ddf47b70cdf8..ceac9e96d1ddd 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/s3/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/s3/source.py @@ -225,7 +225,7 @@ def __init__(self, config: DataLakeSourceConfig, ctx: PipelineContext): self.init_spark() def init_spark(self): - os.environ.setdefault("SPARK_VERSION", "3.3") + os.environ.setdefault("SPARK_VERSION", "3.5") spark_version = os.environ["SPARK_VERSION"] # Importing here to avoid Deequ dependency for non profiling use cases diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/hive.py b/metadata-ingestion/src/datahub/ingestion/source/sql/hive.py index fad54fda45378..6d67ab29b3a3d 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/hive.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/hive.py @@ -838,3 +838,18 @@ def _process_view( entityUrn=dataset_urn, aspect=view_properties_aspect, ).as_workunit() + + if view_definition and self.config.include_view_lineage: + default_db = None + default_schema = None + try: + default_db, default_schema = self.get_db_schema(dataset_name) + except ValueError: + logger.warning(f"Invalid view identifier: {dataset_name}") + + self.aggregator.add_view_definition( + view_urn=dataset_urn, + view_definition=view_definition, + default_db=default_db, + default_schema=default_schema, + ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/hive_metastore.py b/metadata-ingestion/src/datahub/ingestion/source/sql/hive_metastore.py index adb171d4ad54b..60ecbaf38838a 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/hive_metastore.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/hive_metastore.py @@ -123,6 +123,10 @@ class HiveMetastore(BasicSQLAlchemyConfig): description="Dataset Subtype name to be 'Table' or 'View' Valid options: ['True', 'False']", ) + include_view_lineage: bool = Field( + default=False, description="", hidden_from_docs=True + ) + include_catalog_name_in_ids: bool = Field( default=False, description="Add the Presto catalog name (e.g. hive) to the generated dataset urns. `urn:li:dataset:(urn:li:dataPlatform:hive,hive.user.logging_events,PROD)` versus `urn:li:dataset:(urn:li:dataPlatform:hive,user.logging_events,PROD)`", @@ -160,6 +164,9 @@ def get_sql_alchemy_url( @capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion") @capability(SourceCapability.DATA_PROFILING, "Not Supported", False) @capability(SourceCapability.CLASSIFICATION, "Not Supported", False) +@capability( + SourceCapability.LINEAGE_COARSE, "View lineage is not supported", supported=False +) class HiveMetastoreSource(SQLAlchemySource): """ This plugin extracts the following: diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py index 9d8b67041998c..a2338f14196d7 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py @@ -724,7 +724,7 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: ): yield from auto_workunit( generate_procedure_lineage( - schema_resolver=self.schema_resolver, + schema_resolver=self.get_schema_resolver(), procedure=procedure, procedure_job_urn=MSSQLDataJob(entity=procedure).urn, is_temp_table=self.is_temp_table, diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py index 4e22930e7a2a0..a0bd9ce0760bd 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py @@ -11,7 +11,6 @@ Dict, Iterable, List, - MutableMapping, Optional, Set, Tuple, @@ -36,7 +35,6 @@ make_tag_urn, ) from datahub.emitter.mcp import MetadataChangeProposalWrapper -from datahub.emitter.sql_parsing_builder import SqlParsingBuilder from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.api.decorators import capability from datahub.ingestion.api.incremental_lineage_helper import auto_incremental_lineage @@ -79,7 +77,6 @@ StatefulIngestionSourceBase, ) from datahub.metadata.com.linkedin.pegasus2avro.common import StatusClass -from datahub.metadata.com.linkedin.pegasus2avro.dataset import UpstreamLineage from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import DatasetSnapshot from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent from datahub.metadata.com.linkedin.pegasus2avro.schema import ( @@ -106,17 +103,11 @@ GlobalTagsClass, SubTypesClass, TagAssociationClass, - UpstreamClass, ViewPropertiesClass, ) from datahub.sql_parsing.schema_resolver import SchemaResolver -from datahub.sql_parsing.sqlglot_lineage import ( - SqlParsingResult, - sqlglot_lineage, - view_definition_lineage_helper, -) +from datahub.sql_parsing.sql_parsing_aggregator import SqlParsingAggregator from datahub.telemetry import telemetry -from datahub.utilities.file_backed_collections import FileBackedDict from datahub.utilities.registries.domain_registry import DomainRegistry from datahub.utilities.sqlalchemy_type_converter import ( get_native_data_type_for_sqlalchemy_type, @@ -347,17 +338,19 @@ def __init__(self, config: SQLCommonConfig, ctx: PipelineContext, platform: str) ) self.views_failed_parsing: Set[str] = set() - self.schema_resolver: SchemaResolver = SchemaResolver( + + self.discovered_datasets: Set[str] = set() + self.aggregator = SqlParsingAggregator( platform=self.platform, platform_instance=self.config.platform_instance, env=self.config.env, + graph=self.ctx.graph, + generate_lineage=self.include_lineage, + generate_usage_statistics=False, + generate_operations=False, + eager_graph_load=False, ) - self.discovered_datasets: Set[str] = set() - self._view_definition_cache: MutableMapping[str, str] - if self.config.use_file_backed_cache: - self._view_definition_cache = FileBackedDict[str]() - else: - self._view_definition_cache = {} + self.report.sql_aggregator = self.aggregator.report @classmethod def test_connection(cls, config_dict: dict) -> TestConnectionReport: @@ -572,36 +565,9 @@ def get_workunits_internal(self) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit profile_requests, profiler, platform=self.platform ) - if self.config.include_view_lineage: - yield from self.get_view_lineage() - - def get_view_lineage(self) -> Iterable[MetadataWorkUnit]: - builder = SqlParsingBuilder( - generate_lineage=True, - generate_usage_statistics=False, - generate_operations=False, - ) - for dataset_name in self._view_definition_cache.keys(): - # TODO: Ensure that the lineage generated from the view definition - # matches the dataset_name. - view_definition = self._view_definition_cache[dataset_name] - result = self._run_sql_parser( - dataset_name, - view_definition, - self.schema_resolver, - ) - if result and result.out_tables: - # This does not yield any workunits but we use - # yield here to execute this method - yield from builder.process_sql_parsing_result( - result=result, - query=view_definition, - is_view_ddl=True, - include_column_lineage=self.config.include_view_column_lineage, - ) - else: - self.views_failed_parsing.add(dataset_name) - yield from builder.gen_workunits() + # Generate workunit for aggregated SQL parsing results + for mcp in self.aggregator.gen_metadata(): + yield mcp.as_workunit() def get_identifier( self, *, schema: str, entity: str, inspector: Inspector, **kwargs: Any @@ -760,16 +726,6 @@ def _process_table( ) dataset_snapshot.aspects.append(dataset_properties) - if self.config.include_table_location_lineage and location_urn: - external_upstream_table = UpstreamClass( - dataset=location_urn, - type=DatasetLineageTypeClass.COPY, - ) - yield MetadataChangeProposalWrapper( - entityUrn=dataset_snapshot.urn, - aspect=UpstreamLineage(upstreams=[external_upstream_table]), - ).as_workunit() - extra_tags = self.get_extra_tags(inspector, schema, table) pk_constraints: dict = inspector.get_pk_constraint(table, schema) partitions: Optional[List[str]] = self.get_partitions(inspector, schema, table) @@ -795,7 +751,7 @@ def _process_table( dataset_snapshot.aspects.append(schema_metadata) if self._save_schema_to_resolver(): - self.schema_resolver.add_schema_metadata(dataset_urn, schema_metadata) + self.aggregator.register_schema(dataset_urn, schema_metadata) self.discovered_datasets.add(dataset_name) db_name = self.get_db_name(inspector) @@ -815,6 +771,13 @@ def _process_table( ), ) + if self.config.include_table_location_lineage and location_urn: + self.aggregator.add_known_lineage_mapping( + upstream_urn=location_urn, + downstream_urn=dataset_snapshot.urn, + lineage_type=DatasetLineageTypeClass.COPY, + ) + if self.config.domain: assert self.domain_registry yield from get_domain_wu( @@ -1089,6 +1052,7 @@ def _process_view( self.config.platform_instance, self.config.env, ) + try: columns = inspector.get_columns(view, schema) except KeyError: @@ -1108,7 +1072,7 @@ def _process_view( canonical_schema=schema_fields, ) if self._save_schema_to_resolver(): - self.schema_resolver.add_schema_metadata(dataset_urn, schema_metadata) + self.aggregator.register_schema(dataset_urn, schema_metadata) self.discovered_datasets.add(dataset_name) description, properties, _ = self.get_table_properties(inspector, schema, view) @@ -1117,7 +1081,18 @@ def _process_view( view_definition = self._get_view_definition(inspector, schema, view) properties["view_definition"] = view_definition if view_definition and self.config.include_view_lineage: - self._view_definition_cache[dataset_name] = view_definition + default_db = None + default_schema = None + try: + default_db, default_schema = self.get_db_schema(dataset_name) + except ValueError: + logger.warning(f"Invalid view identifier: {dataset_name}") + self.aggregator.add_view_definition( + view_urn=dataset_urn, + view_definition=view_definition, + default_db=default_db, + default_schema=default_schema, + ) dataset_snapshot = DatasetSnapshot( urn=dataset_urn, @@ -1169,48 +1144,9 @@ def _save_schema_to_resolver(self): hasattr(self.config, "include_lineage") and self.config.include_lineage ) - def _run_sql_parser( - self, view_identifier: str, query: str, schema_resolver: SchemaResolver - ) -> Optional[SqlParsingResult]: - try: - database, schema = self.get_db_schema(view_identifier) - except ValueError: - logger.warning(f"Invalid view identifier: {view_identifier}") - return None - raw_lineage = sqlglot_lineage( - query, - schema_resolver=schema_resolver, - default_db=database, - default_schema=schema, - ) - view_urn = make_dataset_urn_with_platform_instance( - self.platform, - view_identifier, - self.config.platform_instance, - self.config.env, - ) - - if raw_lineage.debug_info.table_error: - logger.debug( - f"Failed to parse lineage for view {view_identifier}: " - f"{raw_lineage.debug_info.table_error}" - ) - self.report.num_view_definitions_failed_parsing += 1 - self.report.view_definitions_parsing_failures.append( - f"Table-level sql parsing error for view {view_identifier}: {raw_lineage.debug_info.table_error}" - ) - return None - - elif raw_lineage.debug_info.column_error: - self.report.num_view_definitions_failed_column_parsing += 1 - self.report.view_definitions_parsing_failures.append( - f"Column-level sql parsing error for view {view_identifier}: {raw_lineage.debug_info.column_error}" - ) - else: - self.report.num_view_definitions_parsed += 1 - if raw_lineage.out_tables != [view_urn]: - self.report.num_view_definitions_view_urn_mismatch += 1 - return view_definition_lineage_helper(raw_lineage, view_urn) + @property + def include_lineage(self): + return self.config.include_view_lineage def get_db_schema(self, dataset_identifier: str) -> Tuple[Optional[str], str]: database, schema, _view = dataset_identifier.split(".", 2) @@ -1411,5 +1347,8 @@ def prepare_profiler_args( schema=schema, table=table, partition=partition, custom_sql=custom_sql ) + def get_schema_resolver(self) -> SchemaResolver: + return self.aggregator._schema_resolver + def get_report(self): return self.report diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_generic_profiler.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_generic_profiler.py index bd6c23cc2d464..c91be9b494c00 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_generic_profiler.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_generic_profiler.py @@ -7,7 +7,10 @@ from sqlalchemy import create_engine, inspect from sqlalchemy.engine.reflection import Inspector -from datahub.emitter.mce_builder import make_dataset_urn_with_platform_instance +from datahub.emitter.mce_builder import ( + make_dataset_urn_with_platform_instance, + parse_ts_millis, +) from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.source.ge_data_profiler import ( @@ -245,11 +248,7 @@ def is_dataset_eligible_for_profiling( # If profiling state exists we have to carry over to the new state self.state_handler.add_to_state(dataset_urn, last_profiled) - threshold_time: Optional[datetime] = ( - datetime.fromtimestamp(last_profiled / 1000, timezone.utc) - if last_profiled - else None - ) + threshold_time: Optional[datetime] = parse_ts_millis(last_profiled) if ( not threshold_time and self.config.profiling.profile_if_updated_since_days is not None diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_report.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_report.py index c445ce44a9144..785972b88a49d 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_report.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_report.py @@ -5,6 +5,7 @@ from datahub.ingestion.source.state.stale_entity_removal_handler import ( StaleEntityRemovalSourceReport, ) +from datahub.sql_parsing.sql_parsing_aggregator import SqlAggregatorReport from datahub.utilities.lossy_collections import LossyList from datahub.utilities.sqlalchemy_query_combiner import SQLAlchemyQueryCombinerReport from datahub.utilities.stats_collections import TopKDict, int_top_k_dict @@ -52,6 +53,7 @@ class SQLSourceReport( num_view_definitions_failed_parsing: int = 0 num_view_definitions_failed_column_parsing: int = 0 view_definitions_parsing_failures: LossyList[str] = field(default_factory=LossyList) + sql_aggregator: Optional[SqlAggregatorReport] = None def report_entity_scanned(self, name: str, ent_type: str = "table") -> None: """ diff --git a/metadata-ingestion/src/datahub/ingestion/source/state/checkpoint.py b/metadata-ingestion/src/datahub/ingestion/source/state/checkpoint.py index 5bfd48eb754d5..2c7a4a8b6c137 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/state/checkpoint.py +++ b/metadata-ingestion/src/datahub/ingestion/source/state/checkpoint.py @@ -12,6 +12,7 @@ import pydantic from datahub.configuration.common import ConfigModel +from datahub.emitter.mce_builder import parse_ts_millis from datahub.metadata.schema_classes import ( DatahubIngestionCheckpointClass, IngestionCheckpointStateClass, @@ -144,7 +145,7 @@ def create_from_checkpoint_aspect( ) logger.info( f"Successfully constructed last checkpoint state for job {job_name} " - f"with timestamp {datetime.fromtimestamp(checkpoint_aspect.timestampMillis/1000, tz=timezone.utc)}" + f"with timestamp {parse_ts_millis(checkpoint_aspect.timestampMillis)}" ) return checkpoint return None diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py index df59cae3fad23..d47e10c9eb5c6 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py @@ -920,10 +920,7 @@ def dataset_browse_prefix(self) -> str: return f"/{self.config.env.lower()}{self.no_env_browse_prefix}" def _re_authenticate(self) -> None: - self.report.info( - message="Re-authenticating to Tableau", - context=f"site='{self.site_content_url}'", - ) + logger.info(f"Re-authenticating to Tableau site '{self.site_content_url}'") # Sign-in again may not be enough because Tableau sometimes caches invalid sessions # so we need to recreate the Tableau Server object self.server = self.config.make_tableau_client(self.site_content_url) diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py b/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py index 11827bace4b5a..9b96953794dcd 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py +++ b/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py @@ -4,7 +4,7 @@ import dataclasses import logging -from datetime import datetime, timezone +from datetime import datetime from typing import Any, Dict, Iterable, List, Optional, Union, cast from unittest.mock import patch @@ -27,6 +27,7 @@ from databricks.sdk.service.workspace import ObjectType import datahub +from datahub.emitter.mce_builder import parse_ts_millis from datahub.ingestion.source.unity.hive_metastore_proxy import HiveMetastoreProxy from datahub.ingestion.source.unity.proxy_profiling import ( UnityCatalogProxyProfilingMixin, @@ -211,16 +212,8 @@ def workspace_notebooks(self) -> Iterable[Notebook]: id=obj.object_id, path=obj.path, language=obj.language, - created_at=( - datetime.fromtimestamp(obj.created_at / 1000, tz=timezone.utc) - if obj.created_at - else None - ), - modified_at=( - datetime.fromtimestamp(obj.modified_at / 1000, tz=timezone.utc) - if obj.modified_at - else None - ), + created_at=parse_ts_millis(obj.created_at), + modified_at=parse_ts_millis(obj.modified_at), ) def query_history( @@ -452,17 +445,9 @@ def _create_table( properties=obj.properties or {}, owner=obj.owner, generation=obj.generation, - created_at=( - datetime.fromtimestamp(obj.created_at / 1000, tz=timezone.utc) - if obj.created_at - else None - ), + created_at=(parse_ts_millis(obj.created_at) if obj.created_at else None), created_by=obj.created_by, - updated_at=( - datetime.fromtimestamp(obj.updated_at / 1000, tz=timezone.utc) - if obj.updated_at - else None - ), + updated_at=(parse_ts_millis(obj.updated_at) if obj.updated_at else None), updated_by=obj.updated_by, table_id=obj.table_id, comment=obj.comment, @@ -500,12 +485,8 @@ def _create_query(self, info: QueryInfo) -> Optional[Query]: query_id=info.query_id, query_text=info.query_text, statement_type=info.statement_type, - start_time=datetime.fromtimestamp( - info.query_start_time_ms / 1000, tz=timezone.utc - ), - end_time=datetime.fromtimestamp( - info.query_end_time_ms / 1000, tz=timezone.utc - ), + start_time=parse_ts_millis(info.query_start_time_ms), + end_time=parse_ts_millis(info.query_end_time_ms), user_id=info.user_id, user_name=info.user_name, executed_as_user_id=info.executed_as_user_id, diff --git a/metadata-ingestion/src/datahub/utilities/time.py b/metadata-ingestion/src/datahub/utilities/time.py index 0df7afb19935f..e8338ce068c84 100644 --- a/metadata-ingestion/src/datahub/utilities/time.py +++ b/metadata-ingestion/src/datahub/utilities/time.py @@ -1,6 +1,8 @@ import time from dataclasses import dataclass -from datetime import datetime, timezone +from datetime import datetime + +from datahub.emitter.mce_builder import make_ts_millis, parse_ts_millis def get_current_time_in_seconds() -> int: @@ -9,12 +11,15 @@ def get_current_time_in_seconds() -> int: def ts_millis_to_datetime(ts_millis: int) -> datetime: """Converts input timestamp in milliseconds to a datetime object with UTC timezone""" - return datetime.fromtimestamp(ts_millis / 1000, tz=timezone.utc) + return parse_ts_millis(ts_millis) def datetime_to_ts_millis(dt: datetime) -> int: """Converts a datetime object to timestamp in milliseconds""" - return int(round(dt.timestamp() * 1000)) + # TODO: Deprecate these helpers in favor of make_ts_millis and parse_ts_millis. + # The other ones support None with a typing overload. + # Also possibly move those helpers to this file. + return make_ts_millis(dt) @dataclass diff --git a/metadata-ingestion/src/datahub/utilities/urns/_urn_base.py b/metadata-ingestion/src/datahub/utilities/urns/_urn_base.py index 7dadd16fb7f1c..7996fe0d7b89b 100644 --- a/metadata-ingestion/src/datahub/utilities/urns/_urn_base.py +++ b/metadata-ingestion/src/datahub/utilities/urns/_urn_base.py @@ -1,9 +1,10 @@ import functools import urllib.parse from abc import abstractmethod -from typing import ClassVar, Dict, List, Optional, Type, TypeVar +from typing import ClassVar, Dict, List, Optional, Type from deprecated import deprecated +from typing_extensions import Self from datahub.utilities.urns.error import InvalidUrnError @@ -42,9 +43,6 @@ def _split_entity_id(entity_id: str) -> List[str]: return parts -_UrnSelf = TypeVar("_UrnSelf", bound="Urn") - - @functools.total_ordering class Urn: """ @@ -88,7 +86,7 @@ def entity_ids(self) -> List[str]: return self._entity_ids @classmethod - def from_string(cls: Type[_UrnSelf], urn_str: str) -> "_UrnSelf": + def from_string(cls, urn_str: str) -> Self: """ Creates an Urn from its string representation. @@ -174,7 +172,7 @@ def __hash__(self) -> int: @classmethod @deprecated(reason="prefer .from_string") - def create_from_string(cls: Type[_UrnSelf], urn_str: str) -> "_UrnSelf": + def create_from_string(cls, urn_str: str) -> Self: return cls.from_string(urn_str) @deprecated(reason="prefer .entity_ids") @@ -270,5 +268,5 @@ def underlying_key_aspect_type(cls) -> Type: @classmethod @abstractmethod - def _parse_ids(cls: Type[_UrnSelf], entity_ids: List[str]) -> _UrnSelf: + def _parse_ids(cls, entity_ids: List[str]) -> Self: raise NotImplementedError() diff --git a/metadata-ingestion/tests/integration/dbt/dbt_enabled_with_schemas_mces_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_enabled_with_schemas_mces_golden.json index dc8c400b29157..fb25531e68526 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_enabled_with_schemas_mces_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_enabled_with_schemas_mces_golden.json @@ -2658,7 +2658,7 @@ "actor": "urn:li:corpuser:unknown" }, "lastModified": { - "time": 1580505371997, + "time": 1580505371996, "actor": "urn:li:corpuser:dbt_executor" }, "hash": "", @@ -2930,7 +2930,7 @@ "actor": "urn:li:corpuser:unknown" }, "lastModified": { - "time": 1582319845997, + "time": 1582319845996, "actor": "urn:li:corpuser:dbt_executor" }, "hash": "", @@ -3180,7 +3180,7 @@ "actor": "urn:li:corpuser:unknown" }, "lastModified": { - "time": 1584998318997, + "time": 1584998318996, "actor": "urn:li:corpuser:dbt_executor" }, "hash": "", @@ -3430,7 +3430,7 @@ "actor": "urn:li:corpuser:unknown" }, "lastModified": { - "time": 1588287228997, + "time": 1588287228996, "actor": "urn:li:corpuser:dbt_executor" }, "hash": "", @@ -3680,7 +3680,7 @@ "actor": "urn:li:corpuser:unknown" }, "lastModified": { - "time": 1589460269997, + "time": 1589460269996, "actor": "urn:li:corpuser:dbt_executor" }, "hash": "", diff --git a/metadata-ingestion/tests/integration/dbt/dbt_test_column_meta_mapping_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_test_column_meta_mapping_golden.json index 60f5bf4fbca9a..69c4b9cce0b17 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_test_column_meta_mapping_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_test_column_meta_mapping_golden.json @@ -3024,7 +3024,7 @@ "actor": "urn:li:corpuser:unknown" }, "lastModified": { - "time": 1580505371997, + "time": 1580505371996, "actor": "urn:li:corpuser:dbt_executor" }, "hash": "", @@ -3296,7 +3296,7 @@ "actor": "urn:li:corpuser:unknown" }, "lastModified": { - "time": 1582319845997, + "time": 1582319845996, "actor": "urn:li:corpuser:dbt_executor" }, "hash": "", @@ -3546,7 +3546,7 @@ "actor": "urn:li:corpuser:unknown" }, "lastModified": { - "time": 1584998318997, + "time": 1584998318996, "actor": "urn:li:corpuser:dbt_executor" }, "hash": "", @@ -3796,7 +3796,7 @@ "actor": "urn:li:corpuser:unknown" }, "lastModified": { - "time": 1588287228997, + "time": 1588287228996, "actor": "urn:li:corpuser:dbt_executor" }, "hash": "", @@ -4046,7 +4046,7 @@ "actor": "urn:li:corpuser:unknown" }, "lastModified": { - "time": 1589460269997, + "time": 1589460269996, "actor": "urn:li:corpuser:dbt_executor" }, "hash": "", diff --git a/metadata-ingestion/tests/integration/dbt/dbt_test_prefer_sql_parser_lineage_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_test_prefer_sql_parser_lineage_golden.json index 42a416473ae24..0361e899b5b39 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_test_prefer_sql_parser_lineage_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_test_prefer_sql_parser_lineage_golden.json @@ -564,7 +564,7 @@ "name": "just-some-random-id_urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD)", "type": "BATCH_SCHEDULED", "created": { - "time": 1663355198240, + "time": 1663355198239, "actor": "urn:li:corpuser:datahub" } } @@ -636,7 +636,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1663355198240, + "timestampMillis": 1663355198239, "partitionSpec": { "partition": "FULL_TABLE_SNAPSHOT", "type": "FULL_TABLE" @@ -657,7 +657,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1663355198242, + "timestampMillis": 1663355198241, "partitionSpec": { "partition": "FULL_TABLE_SNAPSHOT", "type": "FULL_TABLE" @@ -1019,7 +1019,7 @@ "name": "just-some-random-id_urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an_aliased_view_for_payments,PROD)", "type": "BATCH_SCHEDULED", "created": { - "time": 1663355198240, + "time": 1663355198239, "actor": "urn:li:corpuser:datahub" } } @@ -1095,7 +1095,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1663355198240, + "timestampMillis": 1663355198239, "partitionSpec": { "partition": "FULL_TABLE_SNAPSHOT", "type": "FULL_TABLE" @@ -1116,7 +1116,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1663355198242, + "timestampMillis": 1663355198241, "partitionSpec": { "partition": "FULL_TABLE_SNAPSHOT", "type": "FULL_TABLE" @@ -1347,7 +1347,7 @@ "name": "just-some-random-id_urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payments_by_customer_by_month,PROD)", "type": "BATCH_SCHEDULED", "created": { - "time": 1663355198240, + "time": 1663355198239, "actor": "urn:li:corpuser:datahub" } } @@ -1418,7 +1418,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1663355198240, + "timestampMillis": 1663355198239, "partitionSpec": { "partition": "FULL_TABLE_SNAPSHOT", "type": "FULL_TABLE" @@ -1439,7 +1439,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1663355198242, + "timestampMillis": 1663355198241, "partitionSpec": { "partition": "FULL_TABLE_SNAPSHOT", "type": "FULL_TABLE" @@ -1871,7 +1871,7 @@ "name": "just-some-random-id_urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD)", "type": "BATCH_SCHEDULED", "created": { - "time": 1663355198240, + "time": 1663355198239, "actor": "urn:li:corpuser:datahub" } } @@ -1942,7 +1942,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1663355198240, + "timestampMillis": 1663355198239, "partitionSpec": { "partition": "FULL_TABLE_SNAPSHOT", "type": "FULL_TABLE" @@ -1963,7 +1963,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1663355198242, + "timestampMillis": 1663355198241, "partitionSpec": { "partition": "FULL_TABLE_SNAPSHOT", "type": "FULL_TABLE" @@ -3140,7 +3140,7 @@ "actor": "urn:li:corpuser:unknown" }, "lastModified": { - "time": 1580505371997, + "time": 1580505371996, "actor": "urn:li:corpuser:dbt_executor" }, "hash": "", @@ -3341,7 +3341,7 @@ "actor": "urn:li:corpuser:unknown" }, "lastModified": { - "time": 1582319845997, + "time": 1582319845996, "actor": "urn:li:corpuser:dbt_executor" }, "hash": "", @@ -3523,7 +3523,7 @@ "actor": "urn:li:corpuser:unknown" }, "lastModified": { - "time": 1584998318997, + "time": 1584998318996, "actor": "urn:li:corpuser:dbt_executor" }, "hash": "", @@ -3705,7 +3705,7 @@ "actor": "urn:li:corpuser:unknown" }, "lastModified": { - "time": 1588287228997, + "time": 1588287228996, "actor": "urn:li:corpuser:dbt_executor" }, "hash": "", @@ -3887,7 +3887,7 @@ "actor": "urn:li:corpuser:unknown" }, "lastModified": { - "time": 1589460269997, + "time": 1589460269996, "actor": "urn:li:corpuser:dbt_executor" }, "hash": "", diff --git a/metadata-ingestion/tests/integration/dbt/dbt_test_test_model_performance_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_test_test_model_performance_golden.json index c281ea3eed0fa..c59620f010343 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_test_test_model_performance_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_test_test_model_performance_golden.json @@ -564,7 +564,7 @@ "name": "just-some-random-id_urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD)", "type": "BATCH_SCHEDULED", "created": { - "time": 1663355198240, + "time": 1663355198239, "actor": "urn:li:corpuser:datahub" } } @@ -636,7 +636,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1663355198240, + "timestampMillis": 1663355198239, "partitionSpec": { "partition": "FULL_TABLE_SNAPSHOT", "type": "FULL_TABLE" @@ -657,7 +657,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1663355198242, + "timestampMillis": 1663355198241, "partitionSpec": { "partition": "FULL_TABLE_SNAPSHOT", "type": "FULL_TABLE" @@ -1019,7 +1019,7 @@ "name": "just-some-random-id_urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an_aliased_view_for_payments,PROD)", "type": "BATCH_SCHEDULED", "created": { - "time": 1663355198240, + "time": 1663355198239, "actor": "urn:li:corpuser:datahub" } } @@ -1095,7 +1095,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1663355198240, + "timestampMillis": 1663355198239, "partitionSpec": { "partition": "FULL_TABLE_SNAPSHOT", "type": "FULL_TABLE" @@ -1116,7 +1116,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1663355198242, + "timestampMillis": 1663355198241, "partitionSpec": { "partition": "FULL_TABLE_SNAPSHOT", "type": "FULL_TABLE" @@ -1347,7 +1347,7 @@ "name": "just-some-random-id_urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payments_by_customer_by_month,PROD)", "type": "BATCH_SCHEDULED", "created": { - "time": 1663355198240, + "time": 1663355198239, "actor": "urn:li:corpuser:datahub" } } @@ -1418,7 +1418,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1663355198240, + "timestampMillis": 1663355198239, "partitionSpec": { "partition": "FULL_TABLE_SNAPSHOT", "type": "FULL_TABLE" @@ -1439,7 +1439,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1663355198242, + "timestampMillis": 1663355198241, "partitionSpec": { "partition": "FULL_TABLE_SNAPSHOT", "type": "FULL_TABLE" @@ -1871,7 +1871,7 @@ "name": "just-some-random-id_urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD)", "type": "BATCH_SCHEDULED", "created": { - "time": 1663355198240, + "time": 1663355198239, "actor": "urn:li:corpuser:datahub" } } @@ -1942,7 +1942,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1663355198240, + "timestampMillis": 1663355198239, "partitionSpec": { "partition": "FULL_TABLE_SNAPSHOT", "type": "FULL_TABLE" @@ -1963,7 +1963,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1663355198242, + "timestampMillis": 1663355198241, "partitionSpec": { "partition": "FULL_TABLE_SNAPSHOT", "type": "FULL_TABLE" @@ -3504,7 +3504,7 @@ "actor": "urn:li:corpuser:unknown" }, "lastModified": { - "time": 1580505371997, + "time": 1580505371996, "actor": "urn:li:corpuser:dbt_executor" }, "hash": "", @@ -3773,7 +3773,7 @@ "actor": "urn:li:corpuser:unknown" }, "lastModified": { - "time": 1582319845997, + "time": 1582319845996, "actor": "urn:li:corpuser:dbt_executor" }, "hash": "", @@ -4023,7 +4023,7 @@ "actor": "urn:li:corpuser:unknown" }, "lastModified": { - "time": 1584998318997, + "time": 1584998318996, "actor": "urn:li:corpuser:dbt_executor" }, "hash": "", @@ -4273,7 +4273,7 @@ "actor": "urn:li:corpuser:unknown" }, "lastModified": { - "time": 1588287228997, + "time": 1588287228996, "actor": "urn:li:corpuser:dbt_executor" }, "hash": "", @@ -4523,7 +4523,7 @@ "actor": "urn:li:corpuser:unknown" }, "lastModified": { - "time": 1589460269997, + "time": 1589460269996, "actor": "urn:li:corpuser:dbt_executor" }, "hash": "", diff --git a/metadata-ingestion/tests/integration/dbt/dbt_test_with_complex_owner_patterns_mces_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_test_with_complex_owner_patterns_mces_golden.json index 495fa32569f56..23b5525b712d0 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_test_with_complex_owner_patterns_mces_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_test_with_complex_owner_patterns_mces_golden.json @@ -2598,7 +2598,7 @@ "actor": "urn:li:corpuser:unknown" }, "lastModified": { - "time": 1580505371997, + "time": 1580505371996, "actor": "urn:li:corpuser:dbt_executor" }, "hash": "", @@ -2867,7 +2867,7 @@ "actor": "urn:li:corpuser:unknown" }, "lastModified": { - "time": 1582319845997, + "time": 1582319845996, "actor": "urn:li:corpuser:dbt_executor" }, "hash": "", @@ -3117,7 +3117,7 @@ "actor": "urn:li:corpuser:unknown" }, "lastModified": { - "time": 1584998318997, + "time": 1584998318996, "actor": "urn:li:corpuser:dbt_executor" }, "hash": "", @@ -3367,7 +3367,7 @@ "actor": "urn:li:corpuser:unknown" }, "lastModified": { - "time": 1588287228997, + "time": 1588287228996, "actor": "urn:li:corpuser:dbt_executor" }, "hash": "", @@ -3617,7 +3617,7 @@ "actor": "urn:li:corpuser:unknown" }, "lastModified": { - "time": 1589460269997, + "time": 1589460269996, "actor": "urn:li:corpuser:dbt_executor" }, "hash": "", diff --git a/metadata-ingestion/tests/integration/dbt/dbt_test_with_data_platform_instance_mces_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_test_with_data_platform_instance_mces_golden.json index 20b7cf4a1c26c..da22458f5624c 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_test_with_data_platform_instance_mces_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_test_with_data_platform_instance_mces_golden.json @@ -2610,7 +2610,7 @@ "actor": "urn:li:corpuser:unknown" }, "lastModified": { - "time": 1580505371997, + "time": 1580505371996, "actor": "urn:li:corpuser:dbt_executor" }, "hash": "", @@ -2880,7 +2880,7 @@ "actor": "urn:li:corpuser:unknown" }, "lastModified": { - "time": 1582319845997, + "time": 1582319845996, "actor": "urn:li:corpuser:dbt_executor" }, "hash": "", @@ -3131,7 +3131,7 @@ "actor": "urn:li:corpuser:unknown" }, "lastModified": { - "time": 1584998318997, + "time": 1584998318996, "actor": "urn:li:corpuser:dbt_executor" }, "hash": "", @@ -3382,7 +3382,7 @@ "actor": "urn:li:corpuser:unknown" }, "lastModified": { - "time": 1588287228997, + "time": 1588287228996, "actor": "urn:li:corpuser:dbt_executor" }, "hash": "", @@ -3633,7 +3633,7 @@ "actor": "urn:li:corpuser:unknown" }, "lastModified": { - "time": 1589460269997, + "time": 1589460269996, "actor": "urn:li:corpuser:dbt_executor" }, "hash": "", diff --git a/metadata-ingestion/tests/integration/dbt/dbt_test_with_non_incremental_lineage_mces_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_test_with_non_incremental_lineage_mces_golden.json index 80ca85a5e6c61..0b44fe77cd62a 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_test_with_non_incremental_lineage_mces_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_test_with_non_incremental_lineage_mces_golden.json @@ -2599,7 +2599,7 @@ "actor": "urn:li:corpuser:unknown" }, "lastModified": { - "time": 1580505371997, + "time": 1580505371996, "actor": "urn:li:corpuser:dbt_executor" }, "hash": "", @@ -2868,7 +2868,7 @@ "actor": "urn:li:corpuser:unknown" }, "lastModified": { - "time": 1582319845997, + "time": 1582319845996, "actor": "urn:li:corpuser:dbt_executor" }, "hash": "", @@ -3118,7 +3118,7 @@ "actor": "urn:li:corpuser:unknown" }, "lastModified": { - "time": 1584998318997, + "time": 1584998318996, "actor": "urn:li:corpuser:dbt_executor" }, "hash": "", @@ -3368,7 +3368,7 @@ "actor": "urn:li:corpuser:unknown" }, "lastModified": { - "time": 1588287228997, + "time": 1588287228996, "actor": "urn:li:corpuser:dbt_executor" }, "hash": "", @@ -3618,7 +3618,7 @@ "actor": "urn:li:corpuser:unknown" }, "lastModified": { - "time": 1589460269997, + "time": 1589460269996, "actor": "urn:li:corpuser:dbt_executor" }, "hash": "", diff --git a/metadata-ingestion/tests/integration/dbt/dbt_test_with_target_platform_instance_mces_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_test_with_target_platform_instance_mces_golden.json index 1e6e4d8ba94a2..3174847dd7e7a 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_test_with_target_platform_instance_mces_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_test_with_target_platform_instance_mces_golden.json @@ -2599,7 +2599,7 @@ "actor": "urn:li:corpuser:unknown" }, "lastModified": { - "time": 1580505371997, + "time": 1580505371996, "actor": "urn:li:corpuser:dbt_executor" }, "hash": "", @@ -2868,7 +2868,7 @@ "actor": "urn:li:corpuser:unknown" }, "lastModified": { - "time": 1582319845997, + "time": 1582319845996, "actor": "urn:li:corpuser:dbt_executor" }, "hash": "", @@ -3118,7 +3118,7 @@ "actor": "urn:li:corpuser:unknown" }, "lastModified": { - "time": 1584998318997, + "time": 1584998318996, "actor": "urn:li:corpuser:dbt_executor" }, "hash": "", @@ -3368,7 +3368,7 @@ "actor": "urn:li:corpuser:unknown" }, "lastModified": { - "time": 1588287228997, + "time": 1588287228996, "actor": "urn:li:corpuser:dbt_executor" }, "hash": "", @@ -3618,7 +3618,7 @@ "actor": "urn:li:corpuser:unknown" }, "lastModified": { - "time": 1589460269997, + "time": 1589460269996, "actor": "urn:li:corpuser:dbt_executor" }, "hash": "", diff --git a/metadata-ingestion/tests/integration/feast/test_feast_repository.py b/metadata-ingestion/tests/integration/feast/test_feast_repository.py index 7f04337145dc3..80d7c6311a958 100644 --- a/metadata-ingestion/tests/integration/feast/test_feast_repository.py +++ b/metadata-ingestion/tests/integration/feast/test_feast_repository.py @@ -1,3 +1,6 @@ +import sys + +import pytest from freezegun import freeze_time from datahub.ingestion.run.pipeline import Pipeline @@ -6,6 +9,11 @@ FROZEN_TIME = "2020-04-14 07:00:00" +# The test is skipped for python 3.11 due to conflicting dependencies in installDev +# setup that requires pydantic < 2 for majority plugins. Note that the test works with +# python 3.11 if run with standalone virtual env setup with feast plugin alone using +# `pip install acryl-datahub[feast]` since it allows pydantic > 2 +@pytest.mark.skipif(sys.version_info > (3, 11), reason="Skipped on Python 3.11+") @freeze_time(FROZEN_TIME) def test_feast_repository_ingest(pytestconfig, tmp_path, mock_time): test_resources_dir = pytestconfig.rootpath / "tests/integration/feast" diff --git a/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_1.json b/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_1.json index 3ba795a5d044a..8d2e29078880d 100644 --- a/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_1.json +++ b/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_1.json @@ -87,6 +87,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "container", + "entityUrn": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "container", "entityUrn": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae", @@ -160,22 +176,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "container", - "entityUrn": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589" - } - }, - "systemMetadata": { - "lastObserved": 1632398400000, - "runId": "hive-metastore-test", - "lastRunId": "no-run-id-provided" - } -}, { "entityType": "container", "entityUrn": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae", @@ -238,7 +238,7 @@ { "op": "add", "path": "/customProperties/transient_lastDdlTime", - "value": "1715258696" + "value": "1735298453" }, { "op": "add", @@ -268,7 +268,7 @@ { "op": "add", "path": "/customProperties/create_date", - "value": "2024-05-09" + "value": "2024-12-27" } ] }, @@ -428,7 +428,7 @@ { "op": "add", "path": "/customProperties/transient_lastDdlTime", - "value": "1715258696" + "value": "1735298453" }, { "op": "add", @@ -463,7 +463,7 @@ { "op": "add", "path": "/customProperties/create_date", - "value": "2024-05-09" + "value": "2024-12-27" } ] }, @@ -672,10 +672,15 @@ "path": "/name", "value": "nested_struct_test" }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, { "op": "add", "path": "/customProperties/transient_lastDdlTime", - "value": "1715258695" + "value": "1735298453" }, { "op": "add", @@ -697,11 +702,6 @@ "path": "/customProperties/numRows", "value": "0" }, - { - "op": "add", - "path": "/customProperties/totalSize", - "value": "0" - }, { "op": "add", "path": "/customProperties/table_type", @@ -715,7 +715,7 @@ { "op": "add", "path": "/customProperties/create_date", - "value": "2024-05-09" + "value": "2024-12-27" } ] }, @@ -926,11 +926,6 @@ "path": "/customProperties/another.comment", "value": "This table has no partitions" }, - { - "op": "add", - "path": "/customProperties/numFiles", - "value": "1" - }, { "op": "add", "path": "/customProperties/numRows", @@ -943,13 +938,18 @@ }, { "op": "add", - "path": "/customProperties/transient_lastDdlTime", - "value": "1715258689" + "path": "/customProperties/totalSize", + "value": "33" }, { "op": "add", - "path": "/customProperties/totalSize", - "value": "33" + "path": "/customProperties/numFiles", + "value": "1" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1735298448" }, { "op": "add", @@ -974,7 +974,7 @@ { "op": "add", "path": "/customProperties/create_date", - "value": "2024-05-09" + "value": "2024-12-27" } ] }, @@ -1164,6 +1164,11 @@ "path": "/customProperties/numRows", "value": "0" }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1735298442" + }, { "op": "add", "path": "/customProperties/numFiles", @@ -1174,11 +1179,6 @@ "path": "/customProperties/COLUMN_STATS_ACCURATE", "value": "{\"BASIC_STATS\":\"true\"}" }, - { - "op": "add", - "path": "/customProperties/transient_lastDdlTime", - "value": "1715258680" - }, { "op": "add", "path": "/customProperties/rawDataSize", @@ -1202,7 +1202,7 @@ { "op": "add", "path": "/customProperties/create_date", - "value": "2024-05-09" + "value": "2024-12-27" } ] }, @@ -1386,6 +1386,11 @@ "path": "/customProperties/numRows", "value": "0" }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1735298441" + }, { "op": "add", "path": "/customProperties/numFiles", @@ -1396,11 +1401,6 @@ "path": "/customProperties/COLUMN_STATS_ACCURATE", "value": "{\"BASIC_STATS\":\"true\"}" }, - { - "op": "add", - "path": "/customProperties/transient_lastDdlTime", - "value": "1715258680" - }, { "op": "add", "path": "/customProperties/rawDataSize", @@ -1424,7 +1424,7 @@ { "op": "add", "path": "/customProperties/create_date", - "value": "2024-05-09" + "value": "2024-12-27" } ] }, @@ -1576,7 +1576,7 @@ { "op": "add", "path": "/customProperties/transient_lastDdlTime", - "value": "1715258672" + "value": "1735298433" }, { "op": "add", @@ -1591,7 +1591,7 @@ { "op": "add", "path": "/customProperties/create_date", - "value": "2024-05-09" + "value": "2024-12-27" }, { "op": "add", @@ -1637,31 +1637,31 @@ }, "fields": [ { - "fieldPath": "[version=2.0].[type=string].baz", + "fieldPath": "[version=2.0].[type=int].foo", "nullable": true, "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "string", + "nativeDataType": "int", "recursive": false, "isPartOfKey": false, - "isPartitioningKey": true, - "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + "jsonProps": "{\"native_data_type\": \"int\", \"_nullable\": true}" }, { - "fieldPath": "[version=2.0].[type=int].foo", + "fieldPath": "[version=2.0].[type=string].baz", "nullable": true, "type": { "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} + "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "int", + "nativeDataType": "string", "recursive": false, "isPartOfKey": false, - "jsonProps": "{\"native_data_type\": \"int\", \"_nullable\": true}" + "isPartitioningKey": true, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" }, { "fieldPath": "[version=2.0].[type=string].bar", diff --git a/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_3.json b/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_3.json index a9bf2cb26da49..f408c6c064848 100644 --- a/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_3.json +++ b/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_3.json @@ -87,6 +87,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "container", + "entityUrn": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "container", "entityUrn": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae", @@ -160,22 +176,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "container", - "entityUrn": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589" - } - }, - "systemMetadata": { - "lastObserved": 1632398400000, - "runId": "hive-metastore-test", - "lastRunId": "no-run-id-provided" - } -}, { "entityType": "container", "entityUrn": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae", @@ -238,7 +238,7 @@ { "op": "add", "path": "/customProperties/transient_lastDdlTime", - "value": "1715258696" + "value": "1735298453" }, { "op": "add", @@ -268,7 +268,7 @@ { "op": "add", "path": "/customProperties/create_date", - "value": "2024-05-09" + "value": "2024-12-27" } ] }, @@ -428,7 +428,7 @@ { "op": "add", "path": "/customProperties/transient_lastDdlTime", - "value": "1715258696" + "value": "1735298453" }, { "op": "add", @@ -463,7 +463,7 @@ { "op": "add", "path": "/customProperties/create_date", - "value": "2024-05-09" + "value": "2024-12-27" } ] }, @@ -672,10 +672,15 @@ "path": "/name", "value": "nested_struct_test" }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, { "op": "add", "path": "/customProperties/transient_lastDdlTime", - "value": "1715258695" + "value": "1735298453" }, { "op": "add", @@ -697,11 +702,6 @@ "path": "/customProperties/numRows", "value": "0" }, - { - "op": "add", - "path": "/customProperties/totalSize", - "value": "0" - }, { "op": "add", "path": "/customProperties/table_type", @@ -715,7 +715,7 @@ { "op": "add", "path": "/customProperties/create_date", - "value": "2024-05-09" + "value": "2024-12-27" } ] }, @@ -926,11 +926,6 @@ "path": "/customProperties/another.comment", "value": "This table has no partitions" }, - { - "op": "add", - "path": "/customProperties/numFiles", - "value": "1" - }, { "op": "add", "path": "/customProperties/numRows", @@ -943,13 +938,18 @@ }, { "op": "add", - "path": "/customProperties/transient_lastDdlTime", - "value": "1715258689" + "path": "/customProperties/totalSize", + "value": "33" }, { "op": "add", - "path": "/customProperties/totalSize", - "value": "33" + "path": "/customProperties/numFiles", + "value": "1" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1735298448" }, { "op": "add", @@ -974,7 +974,7 @@ { "op": "add", "path": "/customProperties/create_date", - "value": "2024-05-09" + "value": "2024-12-27" } ] }, @@ -1164,6 +1164,11 @@ "path": "/customProperties/numRows", "value": "0" }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1735298442" + }, { "op": "add", "path": "/customProperties/numFiles", @@ -1174,11 +1179,6 @@ "path": "/customProperties/COLUMN_STATS_ACCURATE", "value": "{\"BASIC_STATS\":\"true\"}" }, - { - "op": "add", - "path": "/customProperties/transient_lastDdlTime", - "value": "1715258680" - }, { "op": "add", "path": "/customProperties/rawDataSize", @@ -1202,7 +1202,7 @@ { "op": "add", "path": "/customProperties/create_date", - "value": "2024-05-09" + "value": "2024-12-27" } ] }, @@ -1386,6 +1386,11 @@ "path": "/customProperties/numRows", "value": "0" }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1735298441" + }, { "op": "add", "path": "/customProperties/numFiles", @@ -1396,11 +1401,6 @@ "path": "/customProperties/COLUMN_STATS_ACCURATE", "value": "{\"BASIC_STATS\":\"true\"}" }, - { - "op": "add", - "path": "/customProperties/transient_lastDdlTime", - "value": "1715258680" - }, { "op": "add", "path": "/customProperties/rawDataSize", @@ -1424,7 +1424,7 @@ { "op": "add", "path": "/customProperties/create_date", - "value": "2024-05-09" + "value": "2024-12-27" } ] }, @@ -1576,7 +1576,7 @@ { "op": "add", "path": "/customProperties/transient_lastDdlTime", - "value": "1715258672" + "value": "1735298433" }, { "op": "add", @@ -1591,7 +1591,7 @@ { "op": "add", "path": "/customProperties/create_date", - "value": "2024-05-09" + "value": "2024-12-27" }, { "op": "add", @@ -1637,31 +1637,31 @@ }, "fields": [ { - "fieldPath": "[version=2.0].[type=string].baz", + "fieldPath": "[version=2.0].[type=int].foo", "nullable": true, "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "string", + "nativeDataType": "int", "recursive": false, "isPartOfKey": false, - "isPartitioningKey": true, - "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + "jsonProps": "{\"native_data_type\": \"int\", \"_nullable\": true}" }, { - "fieldPath": "[version=2.0].[type=int].foo", + "fieldPath": "[version=2.0].[type=string].baz", "nullable": true, "type": { "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} + "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "int", + "nativeDataType": "string", "recursive": false, "isPartOfKey": false, - "jsonProps": "{\"native_data_type\": \"int\", \"_nullable\": true}" + "isPartitioningKey": true, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" }, { "fieldPath": "[version=2.0].[type=string].bar", diff --git a/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_5.json b/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_5.json index 1937550e1bcbd..7604a96aef825 100644 --- a/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_5.json +++ b/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_5.json @@ -87,6 +87,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "container", + "entityUrn": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "container", "entityUrn": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae", @@ -160,22 +176,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "container", - "entityUrn": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589" - } - }, - "systemMetadata": { - "lastObserved": 1632398400000, - "runId": "hive-metastore-test", - "lastRunId": "no-run-id-provided" - } -}, { "entityType": "container", "entityUrn": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae", @@ -238,7 +238,7 @@ { "op": "add", "path": "/customProperties/transient_lastDdlTime", - "value": "1715258696" + "value": "1735298453" }, { "op": "add", @@ -268,7 +268,7 @@ { "op": "add", "path": "/customProperties/create_date", - "value": "2024-05-09" + "value": "2024-12-27" } ] }, @@ -428,7 +428,7 @@ { "op": "add", "path": "/customProperties/transient_lastDdlTime", - "value": "1715258696" + "value": "1735298453" }, { "op": "add", @@ -463,7 +463,7 @@ { "op": "add", "path": "/customProperties/create_date", - "value": "2024-05-09" + "value": "2024-12-27" } ] }, @@ -672,10 +672,15 @@ "path": "/name", "value": "nested_struct_test" }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, { "op": "add", "path": "/customProperties/transient_lastDdlTime", - "value": "1715258695" + "value": "1735298453" }, { "op": "add", @@ -697,11 +702,6 @@ "path": "/customProperties/numRows", "value": "0" }, - { - "op": "add", - "path": "/customProperties/totalSize", - "value": "0" - }, { "op": "add", "path": "/customProperties/table_type", @@ -715,7 +715,7 @@ { "op": "add", "path": "/customProperties/create_date", - "value": "2024-05-09" + "value": "2024-12-27" } ] }, @@ -926,11 +926,6 @@ "path": "/customProperties/another.comment", "value": "This table has no partitions" }, - { - "op": "add", - "path": "/customProperties/numFiles", - "value": "1" - }, { "op": "add", "path": "/customProperties/numRows", @@ -943,13 +938,18 @@ }, { "op": "add", - "path": "/customProperties/transient_lastDdlTime", - "value": "1715258689" + "path": "/customProperties/totalSize", + "value": "33" }, { "op": "add", - "path": "/customProperties/totalSize", - "value": "33" + "path": "/customProperties/numFiles", + "value": "1" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1735298448" }, { "op": "add", @@ -974,7 +974,7 @@ { "op": "add", "path": "/customProperties/create_date", - "value": "2024-05-09" + "value": "2024-12-27" } ] }, @@ -1164,6 +1164,11 @@ "path": "/customProperties/numRows", "value": "0" }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1735298442" + }, { "op": "add", "path": "/customProperties/numFiles", @@ -1174,11 +1179,6 @@ "path": "/customProperties/COLUMN_STATS_ACCURATE", "value": "{\"BASIC_STATS\":\"true\"}" }, - { - "op": "add", - "path": "/customProperties/transient_lastDdlTime", - "value": "1715258680" - }, { "op": "add", "path": "/customProperties/rawDataSize", @@ -1202,7 +1202,7 @@ { "op": "add", "path": "/customProperties/create_date", - "value": "2024-05-09" + "value": "2024-12-27" } ] }, @@ -1386,6 +1386,11 @@ "path": "/customProperties/numRows", "value": "0" }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1735298441" + }, { "op": "add", "path": "/customProperties/numFiles", @@ -1396,11 +1401,6 @@ "path": "/customProperties/COLUMN_STATS_ACCURATE", "value": "{\"BASIC_STATS\":\"true\"}" }, - { - "op": "add", - "path": "/customProperties/transient_lastDdlTime", - "value": "1715258680" - }, { "op": "add", "path": "/customProperties/rawDataSize", @@ -1424,7 +1424,7 @@ { "op": "add", "path": "/customProperties/create_date", - "value": "2024-05-09" + "value": "2024-12-27" } ] }, @@ -1576,7 +1576,7 @@ { "op": "add", "path": "/customProperties/transient_lastDdlTime", - "value": "1715258672" + "value": "1735298433" }, { "op": "add", @@ -1591,7 +1591,7 @@ { "op": "add", "path": "/customProperties/create_date", - "value": "2024-05-09" + "value": "2024-12-27" }, { "op": "add", @@ -1637,31 +1637,31 @@ }, "fields": [ { - "fieldPath": "baz", + "fieldPath": "foo", "nullable": true, "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "string", + "nativeDataType": "int", "recursive": false, "isPartOfKey": false, - "isPartitioningKey": true, - "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + "jsonProps": "{\"native_data_type\": \"int\", \"_nullable\": true}" }, { - "fieldPath": "foo", + "fieldPath": "baz", "nullable": true, "type": { "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} + "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "int", + "nativeDataType": "string", "recursive": false, "isPartOfKey": false, - "jsonProps": "{\"native_data_type\": \"int\", \"_nullable\": true}" + "isPartitioningKey": true, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" }, { "fieldPath": "bar", diff --git a/metadata-ingestion/tests/integration/hive/hive_mces_all_db_golden.json b/metadata-ingestion/tests/integration/hive/hive_mces_all_db_golden.json index b3922f76d7b0c..a7716f7e10e55 100644 --- a/metadata-ingestion/tests/integration/hive/hive_mces_all_db_golden.json +++ b/metadata-ingestion/tests/integration/hive/hive_mces_all_db_golden.json @@ -118,7 +118,7 @@ "customProperties": { "Database:": "db1", "Owner:": "root", - "CreateTime:": "Tue Aug 20 15:11:23 UTC 2024", + "CreateTime:": "Thu Dec 26 13:11:56 UTC 2024", "LastAccessTime:": "UNKNOWN", "Retention:": "0", "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/_test_table_underscore", @@ -128,7 +128,7 @@ "Table Parameters: numRows": "0", "Table Parameters: rawDataSize": "0", "Table Parameters: totalSize": "0", - "Table Parameters: transient_lastDdlTime": "1724166683", + "Table Parameters: transient_lastDdlTime": "1735218716", "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat", "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", @@ -268,7 +268,7 @@ "customProperties": { "Database:": "db1", "Owner:": "root", - "CreateTime:": "Tue Aug 20 15:11:23 UTC 2024", + "CreateTime:": "Thu Dec 26 13:11:56 UTC 2024", "LastAccessTime:": "UNKNOWN", "Retention:": "0", "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/array_struct_test", @@ -280,7 +280,7 @@ "Table Parameters: numRows": "1", "Table Parameters: rawDataSize": "32", "Table Parameters: totalSize": "33", - "Table Parameters: transient_lastDdlTime": "1724166687", + "Table Parameters: transient_lastDdlTime": "1735218720", "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat", "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", @@ -458,11 +458,11 @@ "customProperties": { "Database:": "db1", "Owner:": "root", - "CreateTime:": "Tue Aug 20 15:11:30 UTC 2024", + "CreateTime:": "Thu Dec 26 13:12:03 UTC 2024", "LastAccessTime:": "UNKNOWN", "Retention:": "0", "Table Type:": "VIRTUAL_VIEW", - "Table Parameters: transient_lastDdlTime": "1724166690", + "Table Parameters: transient_lastDdlTime": "1735218723", "SerDe Library:": "null", "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat", "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", @@ -608,6 +608,187 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view_2,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:ded36d15fcfbbb939830549697122661" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view_2,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "Database:": "db1", + "Owner:": "root", + "CreateTime:": "Thu Dec 26 13:12:03 UTC 2024", + "LastAccessTime:": "UNKNOWN", + "Retention:": "0", + "Table Type:": "VIRTUAL_VIEW", + "Table Parameters: transient_lastDdlTime": "1735218723", + "SerDe Library:": "null", + "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat", + "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", + "Compressed:": "No", + "Num Buckets:": "-1", + "Bucket Columns:": "[]", + "Sort Columns:": "[]", + "View Original Text:": "select * from db1.array_struct_test_view", + "View Expanded Text:": "select `array_struct_test_view`.`property_id`, `array_struct_test_view`.`service` from `db1`.`array_struct_test_view`", + "View Rewrite Enabled:": "No" + }, + "name": "array_struct_test_view_2", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "db1.array_struct_test_view_2", + "platform": "urn:li:dataPlatform:hive", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "property_id", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "int", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.ArrayType": { + "nestedType": [ + "record" + ] + } + } + }, + "nativeDataType": "array>>", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"array>>\"}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service.[type=string].type", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service.[type=array].[type=int].provider", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.ArrayType": { + "nestedType": [ + "int" + ] + } + } + }, + "nativeDataType": "array", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"array\"}" + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view_2,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view_2,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:ded36d15fcfbbb939830549697122661", + "urn": "urn:li:container:ded36d15fcfbbb939830549697122661" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.map_test,PROD)", @@ -639,7 +820,7 @@ "customProperties": { "Database:": "db1", "Owner:": "root", - "CreateTime:": "Tue Aug 20 15:11:31 UTC 2024", + "CreateTime:": "Thu Dec 26 13:12:04 UTC 2024", "LastAccessTime:": "UNKNOWN", "Retention:": "0", "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/map_test", @@ -649,7 +830,7 @@ "Table Parameters: numRows": "0", "Table Parameters: rawDataSize": "0", "Table Parameters: totalSize": "0", - "Table Parameters: transient_lastDdlTime": "1724166691", + "Table Parameters: transient_lastDdlTime": "1735218724", "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat", "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", @@ -793,7 +974,7 @@ "customProperties": { "Database:": "db1", "Owner:": "root", - "CreateTime:": "Tue Aug 20 15:11:30 UTC 2024", + "CreateTime:": "Thu Dec 26 13:12:03 UTC 2024", "LastAccessTime:": "UNKNOWN", "Retention:": "0", "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/nested_struct_test", @@ -803,7 +984,7 @@ "Table Parameters: numRows": "0", "Table Parameters: rawDataSize": "0", "Table Parameters: totalSize": "0", - "Table Parameters: transient_lastDdlTime": "1724166690", + "Table Parameters: transient_lastDdlTime": "1735218723", "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat", "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", @@ -996,7 +1177,7 @@ "customProperties": { "Database:": "db1", "Owner:": "root", - "CreateTime:": "Tue Aug 20 15:11:20 UTC 2024", + "CreateTime:": "Thu Dec 26 13:11:53 UTC 2024", "LastAccessTime:": "UNKNOWN", "Retention:": "0", "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/pokes", @@ -1006,7 +1187,7 @@ "Table Parameters: numRows": "0", "Table Parameters: rawDataSize": "0", "Table Parameters: totalSize": "5812", - "Table Parameters: transient_lastDdlTime": "1724166680", + "Table Parameters: transient_lastDdlTime": "1735218713", "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat", "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", @@ -1158,7 +1339,7 @@ "customProperties": { "Database:": "db1", "Owner:": "root", - "CreateTime:": "Tue Aug 20 15:11:23 UTC 2024", + "CreateTime:": "Thu Dec 26 13:11:56 UTC 2024", "LastAccessTime:": "UNKNOWN", "Retention:": "0", "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/struct_test", @@ -1168,7 +1349,7 @@ "Table Parameters: numRows": "0", "Table Parameters: rawDataSize": "0", "Table Parameters: totalSize": "0", - "Table Parameters: transient_lastDdlTime": "1724166683", + "Table Parameters: transient_lastDdlTime": "1735218716", "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat", "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", @@ -1339,14 +1520,14 @@ "customProperties": { "Database:": "db1", "Owner:": "root", - "CreateTime:": "Tue Aug 20 15:11:30 UTC 2024", + "CreateTime:": "Thu Dec 26 13:12:02 UTC 2024", "LastAccessTime:": "UNKNOWN", "Retention:": "0", "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/struct_test_view_materialized", "Table Type:": "MATERIALIZED_VIEW", "Table Parameters: numFiles": "0", "Table Parameters: totalSize": "0", - "Table Parameters: transient_lastDdlTime": "1724166690", + "Table Parameters: transient_lastDdlTime": "1735218722", "SerDe Library:": "org.apache.hadoop.hive.ql.io.orc.OrcSerde", "InputFormat:": "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat", "OutputFormat:": "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat", @@ -1519,7 +1700,7 @@ "customProperties": { "Database:": "db1", "Owner:": "root", - "CreateTime:": "Tue Aug 20 15:11:30 UTC 2024", + "CreateTime:": "Thu Dec 26 13:12:03 UTC 2024", "LastAccessTime:": "UNKNOWN", "Retention:": "0", "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/union_test", @@ -1529,7 +1710,7 @@ "Table Parameters: numRows": "0", "Table Parameters: rawDataSize": "0", "Table Parameters: totalSize": "0", - "Table Parameters: transient_lastDdlTime": "1724166690", + "Table Parameters: transient_lastDdlTime": "1735218723", "SerDe Library:": "org.apache.hadoop.hive.ql.io.orc.OrcSerde", "InputFormat:": "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat", "OutputFormat:": "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat", @@ -1756,6 +1937,24 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view_2,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "CREATE VIEW `db1.array_struct_test_view_2` AS select `array_struct_test_view`.`property_id`, `array_struct_test_view`.`service` from `db1`.`array_struct_test_view`", + "viewLanguage": "SQL" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "container", "entityUrn": "urn:li:container:8cc876554899e33efe67c389aaf29c4b", @@ -1875,7 +2074,7 @@ "customProperties": { "Database:": "db2", "Owner:": "root", - "CreateTime:": "Tue Aug 20 15:11:22 UTC 2024", + "CreateTime:": "Thu Dec 26 13:11:55 UTC 2024", "LastAccessTime:": "UNKNOWN", "Retention:": "0", "Location:": "hdfs://namenode:8020/user/hive/warehouse/db2.db/pokes", @@ -1884,7 +2083,7 @@ "Table Parameters: numRows": "0", "Table Parameters: rawDataSize": "0", "Table Parameters: totalSize": "5812", - "Table Parameters: transient_lastDdlTime": "1724166683", + "Table Parameters: transient_lastDdlTime": "1735218716", "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat", "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", @@ -2080,5 +2279,307 @@ "runId": "hive-test", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 1586847600000, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test,PROD)", + "type": "VIEW", + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Ahive%2Cdb1.array_struct_test_view%2CPROD%29" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test,PROD),property_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD),property_id)" + ], + "confidenceScore": 0.35, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Ahive%2Cdb1.array_struct_test_view%2CPROD%29" + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test,PROD),service)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD),service)" + ], + "confidenceScore": 0.35, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Ahive%2Cdb1.array_struct_test_view%2CPROD%29" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Ahive%2Cdb1.array_struct_test_view%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "CREATE VIEW `db1.array_struct_test_view` AS\nSELECT\n `array_struct_test`.`property_id`,\n `array_struct_test`.`service`\nFROM `db1`.`array_struct_test`", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "lastModified": { + "time": 1586847600000, + "actor": "urn:li:corpuser:_ingestion" + } + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Ahive%2Cdb1.array_struct_test_view%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test,PROD),property_id)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test,PROD),service)" + }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD),property_id)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD),service)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Ahive%2Cdb1.array_struct_test_view%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:hive" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view_2,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 1586847600000, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD)", + "type": "VIEW", + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Ahive%2Cdb1.array_struct_test_view_2%2CPROD%29" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD),property_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view_2,PROD),property_id)" + ], + "confidenceScore": 0.35, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Ahive%2Cdb1.array_struct_test_view_2%2CPROD%29" + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD),service)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view_2,PROD),service)" + ], + "confidenceScore": 0.35, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Ahive%2Cdb1.array_struct_test_view_2%2CPROD%29" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Ahive%2Cdb1.array_struct_test_view_2%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "CREATE VIEW `db1.array_struct_test_view_2` AS\nSELECT\n `array_struct_test_view`.`property_id`,\n `array_struct_test_view`.`service`\nFROM `db1`.`array_struct_test_view`", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "lastModified": { + "time": 1586847600000, + "actor": "urn:li:corpuser:_ingestion" + } + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Ahive%2Cdb1.array_struct_test_view_2%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD),property_id)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD),service)" + }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view_2,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view_2,PROD),property_id)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view_2,PROD),service)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Ahive%2Cdb1.array_struct_test_view_2%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:hive" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Ahive%2Cdb1.array_struct_test_view%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Ahive%2Cdb1.array_struct_test_view_2%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/hive/hive_mces_golden.json b/metadata-ingestion/tests/integration/hive/hive_mces_golden.json index 4a0a4886d606a..d24226e3f4544 100644 --- a/metadata-ingestion/tests/integration/hive/hive_mces_golden.json +++ b/metadata-ingestion/tests/integration/hive/hive_mces_golden.json @@ -118,7 +118,7 @@ "customProperties": { "Database:": "db1", "Owner:": "root", - "CreateTime:": "Tue Aug 20 15:11:23 UTC 2024", + "CreateTime:": "Thu Dec 26 13:11:56 UTC 2024", "LastAccessTime:": "UNKNOWN", "Retention:": "0", "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/_test_table_underscore", @@ -128,7 +128,7 @@ "Table Parameters: numRows": "0", "Table Parameters: rawDataSize": "0", "Table Parameters: totalSize": "0", - "Table Parameters: transient_lastDdlTime": "1724166683", + "Table Parameters: transient_lastDdlTime": "1735218716", "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat", "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", @@ -268,7 +268,7 @@ "customProperties": { "Database:": "db1", "Owner:": "root", - "CreateTime:": "Tue Aug 20 15:11:23 UTC 2024", + "CreateTime:": "Thu Dec 26 13:11:56 UTC 2024", "LastAccessTime:": "UNKNOWN", "Retention:": "0", "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/array_struct_test", @@ -280,7 +280,7 @@ "Table Parameters: numRows": "1", "Table Parameters: rawDataSize": "32", "Table Parameters: totalSize": "33", - "Table Parameters: transient_lastDdlTime": "1724166687", + "Table Parameters: transient_lastDdlTime": "1735218720", "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat", "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", @@ -458,11 +458,11 @@ "customProperties": { "Database:": "db1", "Owner:": "root", - "CreateTime:": "Tue Aug 20 15:11:30 UTC 2024", + "CreateTime:": "Thu Dec 26 13:12:03 UTC 2024", "LastAccessTime:": "UNKNOWN", "Retention:": "0", "Table Type:": "VIRTUAL_VIEW", - "Table Parameters: transient_lastDdlTime": "1724166690", + "Table Parameters: transient_lastDdlTime": "1735218723", "SerDe Library:": "null", "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat", "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", @@ -608,6 +608,187 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view_2,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:ded36d15fcfbbb939830549697122661" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view_2,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "Database:": "db1", + "Owner:": "root", + "CreateTime:": "Thu Dec 26 13:12:03 UTC 2024", + "LastAccessTime:": "UNKNOWN", + "Retention:": "0", + "Table Type:": "VIRTUAL_VIEW", + "Table Parameters: transient_lastDdlTime": "1735218723", + "SerDe Library:": "null", + "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat", + "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", + "Compressed:": "No", + "Num Buckets:": "-1", + "Bucket Columns:": "[]", + "Sort Columns:": "[]", + "View Original Text:": "select * from db1.array_struct_test_view", + "View Expanded Text:": "select `array_struct_test_view`.`property_id`, `array_struct_test_view`.`service` from `db1`.`array_struct_test_view`", + "View Rewrite Enabled:": "No" + }, + "name": "array_struct_test_view_2", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "db1.array_struct_test_view_2", + "platform": "urn:li:dataPlatform:hive", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "property_id", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "int", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.ArrayType": { + "nestedType": [ + "record" + ] + } + } + }, + "nativeDataType": "array>>", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"array>>\"}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service.[type=string].type", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service.[type=array].[type=int].provider", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.ArrayType": { + "nestedType": [ + "int" + ] + } + } + }, + "nativeDataType": "array", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"array\"}" + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view_2,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view_2,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:ded36d15fcfbbb939830549697122661", + "urn": "urn:li:container:ded36d15fcfbbb939830549697122661" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.map_test,PROD)", @@ -639,7 +820,7 @@ "customProperties": { "Database:": "db1", "Owner:": "root", - "CreateTime:": "Tue Aug 20 15:11:31 UTC 2024", + "CreateTime:": "Thu Dec 26 13:12:04 UTC 2024", "LastAccessTime:": "UNKNOWN", "Retention:": "0", "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/map_test", @@ -649,7 +830,7 @@ "Table Parameters: numRows": "0", "Table Parameters: rawDataSize": "0", "Table Parameters: totalSize": "0", - "Table Parameters: transient_lastDdlTime": "1724166691", + "Table Parameters: transient_lastDdlTime": "1735218724", "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat", "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", @@ -793,7 +974,7 @@ "customProperties": { "Database:": "db1", "Owner:": "root", - "CreateTime:": "Tue Aug 20 15:11:30 UTC 2024", + "CreateTime:": "Thu Dec 26 13:12:03 UTC 2024", "LastAccessTime:": "UNKNOWN", "Retention:": "0", "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/nested_struct_test", @@ -803,7 +984,7 @@ "Table Parameters: numRows": "0", "Table Parameters: rawDataSize": "0", "Table Parameters: totalSize": "0", - "Table Parameters: transient_lastDdlTime": "1724166690", + "Table Parameters: transient_lastDdlTime": "1735218723", "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat", "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", @@ -996,7 +1177,7 @@ "customProperties": { "Database:": "db1", "Owner:": "root", - "CreateTime:": "Tue Aug 20 15:11:20 UTC 2024", + "CreateTime:": "Thu Dec 26 13:11:53 UTC 2024", "LastAccessTime:": "UNKNOWN", "Retention:": "0", "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/pokes", @@ -1006,7 +1187,7 @@ "Table Parameters: numRows": "0", "Table Parameters: rawDataSize": "0", "Table Parameters: totalSize": "5812", - "Table Parameters: transient_lastDdlTime": "1724166680", + "Table Parameters: transient_lastDdlTime": "1735218713", "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat", "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", @@ -1158,7 +1339,7 @@ "customProperties": { "Database:": "db1", "Owner:": "root", - "CreateTime:": "Tue Aug 20 15:11:23 UTC 2024", + "CreateTime:": "Thu Dec 26 13:11:56 UTC 2024", "LastAccessTime:": "UNKNOWN", "Retention:": "0", "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/struct_test", @@ -1168,7 +1349,7 @@ "Table Parameters: numRows": "0", "Table Parameters: rawDataSize": "0", "Table Parameters: totalSize": "0", - "Table Parameters: transient_lastDdlTime": "1724166683", + "Table Parameters: transient_lastDdlTime": "1735218716", "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat", "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", @@ -1339,14 +1520,14 @@ "customProperties": { "Database:": "db1", "Owner:": "root", - "CreateTime:": "Tue Aug 20 15:11:30 UTC 2024", + "CreateTime:": "Thu Dec 26 13:12:02 UTC 2024", "LastAccessTime:": "UNKNOWN", "Retention:": "0", "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/struct_test_view_materialized", "Table Type:": "MATERIALIZED_VIEW", "Table Parameters: numFiles": "0", "Table Parameters: totalSize": "0", - "Table Parameters: transient_lastDdlTime": "1724166690", + "Table Parameters: transient_lastDdlTime": "1735218722", "SerDe Library:": "org.apache.hadoop.hive.ql.io.orc.OrcSerde", "InputFormat:": "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat", "OutputFormat:": "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat", @@ -1519,7 +1700,7 @@ "customProperties": { "Database:": "db1", "Owner:": "root", - "CreateTime:": "Tue Aug 20 15:11:30 UTC 2024", + "CreateTime:": "Thu Dec 26 13:12:03 UTC 2024", "LastAccessTime:": "UNKNOWN", "Retention:": "0", "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/union_test", @@ -1529,7 +1710,7 @@ "Table Parameters: numRows": "0", "Table Parameters: rawDataSize": "0", "Table Parameters: totalSize": "0", - "Table Parameters: transient_lastDdlTime": "1724166690", + "Table Parameters: transient_lastDdlTime": "1735218723", "SerDe Library:": "org.apache.hadoop.hive.ql.io.orc.OrcSerde", "InputFormat:": "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat", "OutputFormat:": "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat", @@ -1755,5 +1936,325 @@ "runId": "hive-test", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view_2,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "CREATE VIEW `db1.array_struct_test_view_2` AS select `array_struct_test_view`.`property_id`, `array_struct_test_view`.`service` from `db1`.`array_struct_test_view`", + "viewLanguage": "SQL" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 1586847600000, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test,PROD)", + "type": "VIEW", + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Ahive%2Cdb1.array_struct_test_view%2CPROD%29" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test,PROD),property_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD),property_id)" + ], + "confidenceScore": 0.35, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Ahive%2Cdb1.array_struct_test_view%2CPROD%29" + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test,PROD),service)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD),service)" + ], + "confidenceScore": 0.35, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Ahive%2Cdb1.array_struct_test_view%2CPROD%29" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Ahive%2Cdb1.array_struct_test_view%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "CREATE VIEW `db1.array_struct_test_view` AS\nSELECT\n `array_struct_test`.`property_id`,\n `array_struct_test`.`service`\nFROM `db1`.`array_struct_test`", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "lastModified": { + "time": 1586847600000, + "actor": "urn:li:corpuser:_ingestion" + } + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Ahive%2Cdb1.array_struct_test_view%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test,PROD),property_id)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test,PROD),service)" + }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD),property_id)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD),service)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Ahive%2Cdb1.array_struct_test_view%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:hive" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view_2,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 1586847600000, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD)", + "type": "VIEW", + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Ahive%2Cdb1.array_struct_test_view_2%2CPROD%29" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD),property_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view_2,PROD),property_id)" + ], + "confidenceScore": 0.35, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Ahive%2Cdb1.array_struct_test_view_2%2CPROD%29" + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD),service)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view_2,PROD),service)" + ], + "confidenceScore": 0.35, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Ahive%2Cdb1.array_struct_test_view_2%2CPROD%29" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Ahive%2Cdb1.array_struct_test_view_2%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "CREATE VIEW `db1.array_struct_test_view_2` AS\nSELECT\n `array_struct_test_view`.`property_id`,\n `array_struct_test_view`.`service`\nFROM `db1`.`array_struct_test_view`", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "lastModified": { + "time": 1586847600000, + "actor": "urn:li:corpuser:_ingestion" + } + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Ahive%2Cdb1.array_struct_test_view_2%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD),property_id)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD),service)" + }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view_2,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view_2,PROD),property_id)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view_2,PROD),service)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Ahive%2Cdb1.array_struct_test_view_2%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:hive" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Ahive%2Cdb1.array_struct_test_view%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Ahive%2Cdb1.array_struct_test_view_2%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/hive/hive_setup.sql b/metadata-ingestion/tests/integration/hive/hive_setup.sql index 323a78e24d10b..c027c174c9355 100644 --- a/metadata-ingestion/tests/integration/hive/hive_setup.sql +++ b/metadata-ingestion/tests/integration/hive/hive_setup.sql @@ -42,6 +42,8 @@ select * from test_data; CREATE MATERIALIZED VIEW db1.struct_test_view_materialized as select * from db1.struct_test; CREATE VIEW db1.array_struct_test_view as select * from db1.array_struct_test; +CREATE VIEW db1.array_struct_test_view_2 as select * from db1.array_struct_test_view; + CREATE TABLE IF NOT EXISTS db1.nested_struct_test ( property_id INT, diff --git a/metadata-ingestion/tests/integration/mysql/mysql_mces_no_db_golden.json b/metadata-ingestion/tests/integration/mysql/mysql_mces_no_db_golden.json index 14b03619de4c1..974d10d535861 100644 --- a/metadata-ingestion/tests/integration/mysql/mysql_mces_no_db_golden.json +++ b/metadata-ingestion/tests/integration/mysql/mysql_mces_no_db_golden.json @@ -1550,8 +1550,8 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "view_definition": "CREATE ALGORITHM=UNDEFINED DEFINER=`root`@`localhost` SQL SECURITY DEFINER VIEW `metadata_index_view` AS select `metadata_index`.`id` AS `id`,`metadata_index`.`urn` AS `urn`,`metadata_index`.`path` AS `path`,`metadata_index`.`doubleVal` AS `doubleVal` from `metadata_index`", - "is_view": "True" + "is_view": "True", + "view_definition": "CREATE ALGORITHM=UNDEFINED DEFINER=`root`@`localhost` SQL SECURITY DEFINER VIEW `metadata_index_view` AS select `metadata_index`.`id` AS `id`,`metadata_index`.`urn` AS `urn`,`metadata_index`.`path` AS `path`,`metadata_index`.`doubleVal` AS `doubleVal` from `metadata_index`" }, "name": "metadata_index_view", "tags": [] @@ -2701,35 +2701,42 @@ "upstreams": [ { "auditStamp": { + "time": 1586847600000, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { "time": 0, - "actor": "urn:li:corpuser:unknown" + "actor": "urn:li:corpuser:_ingestion" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index,PROD)", - "type": "VIEW" + "type": "VIEW", + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Amysql%2Cmetagalaxy.metadata_index_view%2CPROD%29" } ], "fineGrainedLineages": [ { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index,PROD),doubleVal)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index,PROD),id)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index_view,PROD),doubleVal)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index_view,PROD),id)" ], - "confidenceScore": 1.0 + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Amysql%2Cmetagalaxy.metadata_index_view%2CPROD%29" }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index,PROD),id)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index,PROD),urn)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index_view,PROD),id)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index_view,PROD),urn)" ], - "confidenceScore": 1.0 + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Amysql%2Cmetagalaxy.metadata_index_view%2CPROD%29" }, { "upstreamType": "FIELD_SET", @@ -2740,18 +2747,95 @@ "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index_view,PROD),path)" ], - "confidenceScore": 1.0 + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Amysql%2Cmetagalaxy.metadata_index_view%2CPROD%29" }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index,PROD),urn)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index,PROD),doubleVal)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index_view,PROD),urn)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index_view,PROD),doubleVal)" ], - "confidenceScore": 1.0 + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Amysql%2Cmetagalaxy.metadata_index_view%2CPROD%29" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Amysql%2Cmetagalaxy.metadata_index_view%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "CREATE ALGORITHM=UNDEFINED\nDEFINER=\"root\"@\"localhost\"\nSQL SECURITY DEFINER VIEW `metadata_index_view` AS\nSELECT\n `metadata_index`.`id` AS `id`,\n `metadata_index`.`urn` AS `urn`,\n `metadata_index`.`path` AS `path`,\n `metadata_index`.`doubleVal` AS `doubleVal`\nFROM `metadata_index`", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "lastModified": { + "time": 1586847600000, + "actor": "urn:li:corpuser:_ingestion" + } + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Amysql%2Cmetagalaxy.metadata_index_view%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index,PROD),doubleVal)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index,PROD),id)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index,PROD),path)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index,PROD),urn)" + }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index_view,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index_view,PROD),id)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index_view,PROD),urn)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index_view,PROD),path)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index_view,PROD),doubleVal)" } ] } @@ -2762,6 +2846,38 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Amysql%2Cmetagalaxy.metadata_index_view%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mysql" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Amysql%2Cmetagalaxy.metadata_index_view%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "glossaryTerm", "entityUrn": "urn:li:glossaryTerm:Email_Address", diff --git a/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_database.json b/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_database.json index abd9b2350638a..8cf69535f30f6 100644 --- a/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_database.json +++ b/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_database.json @@ -17,7 +17,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-uzcdxn", + "runId": "oracle-2022_02_03-07_00_00-38ppfw", "lastRunId": "no-run-id-provided" } }, @@ -33,7 +33,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-uzcdxn", + "runId": "oracle-2022_02_03-07_00_00-38ppfw", "lastRunId": "no-run-id-provided" } }, @@ -49,7 +49,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-uzcdxn", + "runId": "oracle-2022_02_03-07_00_00-38ppfw", "lastRunId": "no-run-id-provided" } }, @@ -67,7 +67,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-uzcdxn", + "runId": "oracle-2022_02_03-07_00_00-38ppfw", "lastRunId": "no-run-id-provided" } }, @@ -83,7 +83,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-uzcdxn", + "runId": "oracle-2022_02_03-07_00_00-38ppfw", "lastRunId": "no-run-id-provided" } }, @@ -99,7 +99,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-uzcdxn", + "runId": "oracle-2022_02_03-07_00_00-38ppfw", "lastRunId": "no-run-id-provided" } }, @@ -122,7 +122,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-uzcdxn", + "runId": "oracle-2022_02_03-07_00_00-38ppfw", "lastRunId": "no-run-id-provided" } }, @@ -138,7 +138,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-uzcdxn", + "runId": "oracle-2022_02_03-07_00_00-38ppfw", "lastRunId": "no-run-id-provided" } }, @@ -154,7 +154,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-uzcdxn", + "runId": "oracle-2022_02_03-07_00_00-38ppfw", "lastRunId": "no-run-id-provided" } }, @@ -172,7 +172,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-uzcdxn", + "runId": "oracle-2022_02_03-07_00_00-38ppfw", "lastRunId": "no-run-id-provided" } }, @@ -193,7 +193,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-uzcdxn", + "runId": "oracle-2022_02_03-07_00_00-38ppfw", "lastRunId": "no-run-id-provided" } }, @@ -209,7 +209,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-uzcdxn", + "runId": "oracle-2022_02_03-07_00_00-38ppfw", "lastRunId": "no-run-id-provided" } }, @@ -272,7 +272,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-uzcdxn", + "runId": "oracle-2022_02_03-07_00_00-38ppfw", "lastRunId": "no-run-id-provided" } }, @@ -290,7 +290,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-uzcdxn", + "runId": "oracle-2022_02_03-07_00_00-38ppfw", "lastRunId": "no-run-id-provided" } }, @@ -315,7 +315,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-uzcdxn", + "runId": "oracle-2022_02_03-07_00_00-38ppfw", "lastRunId": "no-run-id-provided" } }, @@ -331,7 +331,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-uzcdxn", + "runId": "oracle-2022_02_03-07_00_00-38ppfw", "lastRunId": "no-run-id-provided" } }, @@ -394,7 +394,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-uzcdxn", + "runId": "oracle-2022_02_03-07_00_00-38ppfw", "lastRunId": "no-run-id-provided" } }, @@ -412,7 +412,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-uzcdxn", + "runId": "oracle-2022_02_03-07_00_00-38ppfw", "lastRunId": "no-run-id-provided" } }, @@ -437,7 +437,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-uzcdxn", + "runId": "oracle-2022_02_03-07_00_00-38ppfw", "lastRunId": "no-run-id-provided" } }, @@ -453,7 +453,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-uzcdxn", + "runId": "oracle-2022_02_03-07_00_00-38ppfw", "lastRunId": "no-run-id-provided" } }, @@ -470,8 +470,8 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "view_definition": "CREATE VIEW mock_view AS\n SELECT\n mock_column1,\n mock_column2\n FROM mock_table", - "is_view": "True" + "is_view": "True", + "view_definition": "CREATE VIEW mock_view AS\n SELECT\n mock_column1,\n mock_column2\n FROM mock_table" }, "name": "view1", "description": "Some mock comment here ...", @@ -519,7 +519,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-uzcdxn", + "runId": "oracle-2022_02_03-07_00_00-38ppfw", "lastRunId": "no-run-id-provided" } }, @@ -537,7 +537,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-uzcdxn", + "runId": "oracle-2022_02_03-07_00_00-38ppfw", "lastRunId": "no-run-id-provided" } }, @@ -555,7 +555,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-uzcdxn", + "runId": "oracle-2022_02_03-07_00_00-38ppfw", "lastRunId": "no-run-id-provided" } }, @@ -580,7 +580,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-uzcdxn", + "runId": "oracle-2022_02_03-07_00_00-38ppfw", "lastRunId": "no-run-id-provided" } }, @@ -596,7 +596,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-uzcdxn", + "runId": "oracle-2022_02_03-07_00_00-38ppfw", "lastRunId": "no-run-id-provided" } }, @@ -619,7 +619,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-uzcdxn", + "runId": "oracle-2022_02_03-07_00_00-38ppfw", "lastRunId": "no-run-id-provided" } }, @@ -635,7 +635,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-uzcdxn", + "runId": "oracle-2022_02_03-07_00_00-38ppfw", "lastRunId": "no-run-id-provided" } }, @@ -651,7 +651,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-uzcdxn", + "runId": "oracle-2022_02_03-07_00_00-38ppfw", "lastRunId": "no-run-id-provided" } }, @@ -669,7 +669,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-uzcdxn", + "runId": "oracle-2022_02_03-07_00_00-38ppfw", "lastRunId": "no-run-id-provided" } }, @@ -690,7 +690,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-uzcdxn", + "runId": "oracle-2022_02_03-07_00_00-38ppfw", "lastRunId": "no-run-id-provided" } }, @@ -706,7 +706,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-uzcdxn", + "runId": "oracle-2022_02_03-07_00_00-38ppfw", "lastRunId": "no-run-id-provided" } }, @@ -769,7 +769,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-uzcdxn", + "runId": "oracle-2022_02_03-07_00_00-38ppfw", "lastRunId": "no-run-id-provided" } }, @@ -787,7 +787,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-uzcdxn", + "runId": "oracle-2022_02_03-07_00_00-38ppfw", "lastRunId": "no-run-id-provided" } }, @@ -812,7 +812,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-uzcdxn", + "runId": "oracle-2022_02_03-07_00_00-38ppfw", "lastRunId": "no-run-id-provided" } }, @@ -828,7 +828,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-uzcdxn", + "runId": "oracle-2022_02_03-07_00_00-38ppfw", "lastRunId": "no-run-id-provided" } }, @@ -891,7 +891,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-uzcdxn", + "runId": "oracle-2022_02_03-07_00_00-38ppfw", "lastRunId": "no-run-id-provided" } }, @@ -909,7 +909,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-uzcdxn", + "runId": "oracle-2022_02_03-07_00_00-38ppfw", "lastRunId": "no-run-id-provided" } }, @@ -934,7 +934,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-uzcdxn", + "runId": "oracle-2022_02_03-07_00_00-38ppfw", "lastRunId": "no-run-id-provided" } }, @@ -950,7 +950,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-uzcdxn", + "runId": "oracle-2022_02_03-07_00_00-38ppfw", "lastRunId": "no-run-id-provided" } }, @@ -967,8 +967,8 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "view_definition": "CREATE VIEW mock_view AS\n SELECT\n mock_column1,\n mock_column2\n FROM mock_table", - "is_view": "True" + "is_view": "True", + "view_definition": "CREATE VIEW mock_view AS\n SELECT\n mock_column1,\n mock_column2\n FROM mock_table" }, "name": "view1", "description": "Some mock comment here ...", @@ -1016,7 +1016,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-uzcdxn", + "runId": "oracle-2022_02_03-07_00_00-38ppfw", "lastRunId": "no-run-id-provided" } }, @@ -1034,7 +1034,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-uzcdxn", + "runId": "oracle-2022_02_03-07_00_00-38ppfw", "lastRunId": "no-run-id-provided" } }, @@ -1052,7 +1052,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-uzcdxn", + "runId": "oracle-2022_02_03-07_00_00-38ppfw", "lastRunId": "no-run-id-provided" } }, @@ -1077,7 +1077,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-uzcdxn", + "runId": "oracle-2022_02_03-07_00_00-38ppfw", "lastRunId": "no-run-id-provided" } }, @@ -1091,11 +1091,16 @@ "upstreams": [ { "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { "time": 0, - "actor": "urn:li:corpuser:unknown" + "actor": "urn:li:corpuser:_ingestion" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema1.mock_table,PROD)", - "type": "VIEW" + "type": "VIEW", + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Aoracle%2COraDoc.schema1.view1%2CPROD%29" } ], "fineGrainedLineages": [ @@ -1108,7 +1113,8 @@ "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.view1,PROD),MOCK_COLUMN1)" ], - "confidenceScore": 1.0 + "confidenceScore": 0.2, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Aoracle%2COraDoc.schema1.view1%2CPROD%29" }, { "upstreamType": "FIELD_SET", @@ -1119,14 +1125,94 @@ "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.view1,PROD),MOCK_COLUMN2)" ], - "confidenceScore": 1.0 + "confidenceScore": 0.2, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Aoracle%2COraDoc.schema1.view1%2CPROD%29" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00-38ppfw", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Aoracle%2COraDoc.schema1.view1%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "CREATE VIEW mock_view AS\nSELECT\n mock_column1,\n mock_column2\nFROM mock_table", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "lastModified": { + "time": 1643871600000, + "actor": "urn:li:corpuser:_ingestion" + } + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00-38ppfw", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Aoracle%2COraDoc.schema1.view1%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema1.mock_table,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema1.mock_table,PROD),MOCK_COLUMN1)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema1.mock_table,PROD),MOCK_COLUMN2)" + }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.view1,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.view1,PROD),MOCK_COLUMN1)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.view1,PROD),MOCK_COLUMN2)" } ] } }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-uzcdxn", + "runId": "oracle-2022_02_03-07_00_00-38ppfw", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Aoracle%2COraDoc.schema1.view1%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:oracle" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00-38ppfw", "lastRunId": "no-run-id-provided" } }, @@ -1140,11 +1226,16 @@ "upstreams": [ { "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { "time": 0, - "actor": "urn:li:corpuser:unknown" + "actor": "urn:li:corpuser:_ingestion" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema2.mock_table,PROD)", - "type": "VIEW" + "type": "VIEW", + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Aoracle%2COraDoc.schema2.view1%2CPROD%29" } ], "fineGrainedLineages": [ @@ -1157,7 +1248,8 @@ "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.view1,PROD),MOCK_COLUMN1)" ], - "confidenceScore": 1.0 + "confidenceScore": 0.2, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Aoracle%2COraDoc.schema2.view1%2CPROD%29" }, { "upstreamType": "FIELD_SET", @@ -1168,14 +1260,126 @@ "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.view1,PROD),MOCK_COLUMN2)" ], - "confidenceScore": 1.0 + "confidenceScore": 0.2, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Aoracle%2COraDoc.schema2.view1%2CPROD%29" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00-38ppfw", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Aoracle%2COraDoc.schema2.view1%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "CREATE VIEW mock_view AS\nSELECT\n mock_column1,\n mock_column2\nFROM mock_table", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "lastModified": { + "time": 1643871600000, + "actor": "urn:li:corpuser:_ingestion" + } + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00-38ppfw", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Aoracle%2COraDoc.schema2.view1%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema2.mock_table,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema2.mock_table,PROD),MOCK_COLUMN1)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema2.mock_table,PROD),MOCK_COLUMN2)" + }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.view1,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.view1,PROD),MOCK_COLUMN1)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.view1,PROD),MOCK_COLUMN2)" } ] } }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00-uzcdxn", + "runId": "oracle-2022_02_03-07_00_00-38ppfw", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Aoracle%2COraDoc.schema2.view1%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:oracle" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00-38ppfw", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Aoracle%2COraDoc.schema1.view1%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00-38ppfw", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Aoracle%2COraDoc.schema2.view1%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00-38ppfw", "lastRunId": "no-run-id-provided" } } diff --git a/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_out_database.json b/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_out_database.json index dc0208586d1a1..d57ea0e21479d 100644 --- a/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_out_database.json +++ b/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_out_database.json @@ -17,7 +17,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-ss8owb", "lastRunId": "no-run-id-provided" } }, @@ -33,7 +33,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-ss8owb", "lastRunId": "no-run-id-provided" } }, @@ -49,7 +49,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-ss8owb", "lastRunId": "no-run-id-provided" } }, @@ -67,7 +67,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-ss8owb", "lastRunId": "no-run-id-provided" } }, @@ -83,7 +83,23 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-ss8owb", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:937a38ee28b69ecae38665c5e842d0ad", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0e497517e191d344b0c403231bc708d0" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00-ss8owb", "lastRunId": "no-run-id-provided" } }, @@ -106,7 +122,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-ss8owb", "lastRunId": "no-run-id-provided" } }, @@ -122,7 +138,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-ss8owb", "lastRunId": "no-run-id-provided" } }, @@ -138,7 +154,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-ss8owb", "lastRunId": "no-run-id-provided" } }, @@ -156,23 +172,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:937a38ee28b69ecae38665c5e842d0ad", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:0e497517e191d344b0c403231bc708d0" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-ss8owb", "lastRunId": "no-run-id-provided" } }, @@ -193,7 +193,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-ss8owb", "lastRunId": "no-run-id-provided" } }, @@ -209,7 +209,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-ss8owb", "lastRunId": "no-run-id-provided" } }, @@ -272,7 +272,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-ss8owb", "lastRunId": "no-run-id-provided" } }, @@ -290,7 +290,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-ss8owb", "lastRunId": "no-run-id-provided" } }, @@ -315,7 +315,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-ss8owb", "lastRunId": "no-run-id-provided" } }, @@ -331,7 +331,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-ss8owb", "lastRunId": "no-run-id-provided" } }, @@ -394,7 +394,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-ss8owb", "lastRunId": "no-run-id-provided" } }, @@ -412,7 +412,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-ss8owb", "lastRunId": "no-run-id-provided" } }, @@ -437,7 +437,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-ss8owb", "lastRunId": "no-run-id-provided" } }, @@ -453,7 +453,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-ss8owb", "lastRunId": "no-run-id-provided" } }, @@ -470,8 +470,8 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "view_definition": "CREATE VIEW mock_view AS\n SELECT\n mock_column1,\n mock_column2\n FROM mock_table", - "is_view": "True" + "is_view": "True", + "view_definition": "CREATE VIEW mock_view AS\n SELECT\n mock_column1,\n mock_column2\n FROM mock_table" }, "name": "view1", "description": "Some mock comment here ...", @@ -519,7 +519,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-ss8owb", "lastRunId": "no-run-id-provided" } }, @@ -537,7 +537,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-ss8owb", "lastRunId": "no-run-id-provided" } }, @@ -555,7 +555,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-ss8owb", "lastRunId": "no-run-id-provided" } }, @@ -580,7 +580,23 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-ss8owb", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:1965527855ae77f259a8ddea2b8eed2f", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0e497517e191d344b0c403231bc708d0" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00-ss8owb", "lastRunId": "no-run-id-provided" } }, @@ -603,7 +619,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-ss8owb", "lastRunId": "no-run-id-provided" } }, @@ -619,7 +635,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-ss8owb", "lastRunId": "no-run-id-provided" } }, @@ -635,7 +651,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-ss8owb", "lastRunId": "no-run-id-provided" } }, @@ -653,23 +669,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:1965527855ae77f259a8ddea2b8eed2f", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:0e497517e191d344b0c403231bc708d0" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-ss8owb", "lastRunId": "no-run-id-provided" } }, @@ -690,7 +690,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-ss8owb", "lastRunId": "no-run-id-provided" } }, @@ -706,7 +706,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-ss8owb", "lastRunId": "no-run-id-provided" } }, @@ -769,7 +769,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-ss8owb", "lastRunId": "no-run-id-provided" } }, @@ -787,7 +787,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-ss8owb", "lastRunId": "no-run-id-provided" } }, @@ -812,7 +812,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-ss8owb", "lastRunId": "no-run-id-provided" } }, @@ -828,7 +828,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-ss8owb", "lastRunId": "no-run-id-provided" } }, @@ -891,7 +891,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-ss8owb", "lastRunId": "no-run-id-provided" } }, @@ -909,7 +909,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-ss8owb", "lastRunId": "no-run-id-provided" } }, @@ -934,7 +934,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-ss8owb", "lastRunId": "no-run-id-provided" } }, @@ -950,7 +950,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-ss8owb", "lastRunId": "no-run-id-provided" } }, @@ -967,8 +967,8 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "view_definition": "CREATE VIEW mock_view AS\n SELECT\n mock_column1,\n mock_column2\n FROM mock_table", - "is_view": "True" + "is_view": "True", + "view_definition": "CREATE VIEW mock_view AS\n SELECT\n mock_column1,\n mock_column2\n FROM mock_table" }, "name": "view1", "description": "Some mock comment here ...", @@ -1016,7 +1016,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-ss8owb", "lastRunId": "no-run-id-provided" } }, @@ -1034,7 +1034,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-ss8owb", "lastRunId": "no-run-id-provided" } }, @@ -1052,7 +1052,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-ss8owb", "lastRunId": "no-run-id-provided" } }, @@ -1077,7 +1077,309 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-ss8owb", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema1.view1,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:oracle,mock_table,PROD)", + "type": "VIEW", + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Aoracle%2Cschema1.view1%2CPROD%29" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,mock_table,PROD),MOCK_COLUMN1)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,schema1.view1,PROD),MOCK_COLUMN1)" + ], + "confidenceScore": 0.2, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Aoracle%2Cschema1.view1%2CPROD%29" + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,mock_table,PROD),MOCK_COLUMN2)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,schema1.view1,PROD),MOCK_COLUMN2)" + ], + "confidenceScore": 0.2, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Aoracle%2Cschema1.view1%2CPROD%29" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00-ss8owb", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Aoracle%2Cschema1.view1%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "CREATE VIEW mock_view AS\nSELECT\n mock_column1,\n mock_column2\nFROM mock_table", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "lastModified": { + "time": 1643871600000, + "actor": "urn:li:corpuser:_ingestion" + } + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00-ss8owb", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Aoracle%2Cschema1.view1%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:oracle,mock_table,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,mock_table,PROD),MOCK_COLUMN1)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,mock_table,PROD),MOCK_COLUMN2)" + }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema1.view1,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,schema1.view1,PROD),MOCK_COLUMN1)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,schema1.view1,PROD),MOCK_COLUMN2)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00-ss8owb", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Aoracle%2Cschema1.view1%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:oracle" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00-ss8owb", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema2.view1,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:oracle,mock_table,PROD)", + "type": "VIEW", + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Aoracle%2Cschema2.view1%2CPROD%29" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,mock_table,PROD),MOCK_COLUMN1)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,schema2.view1,PROD),MOCK_COLUMN1)" + ], + "confidenceScore": 0.2, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Aoracle%2Cschema2.view1%2CPROD%29" + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,mock_table,PROD),MOCK_COLUMN2)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,schema2.view1,PROD),MOCK_COLUMN2)" + ], + "confidenceScore": 0.2, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Aoracle%2Cschema2.view1%2CPROD%29" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00-ss8owb", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Aoracle%2Cschema2.view1%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "CREATE VIEW mock_view AS\nSELECT\n mock_column1,\n mock_column2\nFROM mock_table", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "lastModified": { + "time": 1643871600000, + "actor": "urn:li:corpuser:_ingestion" + } + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00-ss8owb", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Aoracle%2Cschema2.view1%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:oracle,mock_table,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,mock_table,PROD),MOCK_COLUMN1)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,mock_table,PROD),MOCK_COLUMN2)" + }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:oracle,schema2.view1,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,schema2.view1,PROD),MOCK_COLUMN1)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,schema2.view1,PROD),MOCK_COLUMN2)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00-ss8owb", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Aoracle%2Cschema2.view1%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:oracle" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00-ss8owb", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Aoracle%2Cschema1.view1%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00-ss8owb", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Aoracle%2Cschema2.view1%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00-ss8owb", "lastRunId": "no-run-id-provided" } } diff --git a/metadata-ingestion/tests/integration/postgres/postgres_all_db_mces_with_db_golden.json b/metadata-ingestion/tests/integration/postgres/postgres_all_db_mces_with_db_golden.json index 21898ca246b65..dea5123e93b14 100644 --- a/metadata-ingestion/tests/integration/postgres/postgres_all_db_mces_with_db_golden.json +++ b/metadata-ingestion/tests/integration/postgres/postgres_all_db_mces_with_db_golden.json @@ -87,6 +87,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a208486b83be39fa411922e07701d984", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0202f800c992262c01ae6bbd5ee313f7" + } + }, + "systemMetadata": { + "lastObserved": 1646575200000, + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "container", "entityUrn": "urn:li:container:a208486b83be39fa411922e07701d984", @@ -160,22 +176,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a208486b83be39fa411922e07701d984", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:0202f800c992262c01ae6bbd5ee313f7" - } - }, - "systemMetadata": { - "lastObserved": 1646575200000, - "runId": "postgres-test", - "lastRunId": "no-run-id-provided" - } -}, { "entityType": "container", "entityUrn": "urn:li:container:a208486b83be39fa411922e07701d984", @@ -285,6 +285,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "container", + "entityUrn": "urn:li:container:51904fc8cd5cc729bc630decff284525", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:a6097853edba03be190d99ece4b307ff" + } + }, + "systemMetadata": { + "lastObserved": 1646575200000, + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "container", "entityUrn": "urn:li:container:51904fc8cd5cc729bc630decff284525", @@ -358,22 +374,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "container", - "entityUrn": "urn:li:container:51904fc8cd5cc729bc630decff284525", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:a6097853edba03be190d99ece4b307ff" - } - }, - "systemMetadata": { - "lastObserved": 1646575200000, - "runId": "postgres-test", - "lastRunId": "no-run-id-provided" - } -}, { "entityType": "container", "entityUrn": "urn:li:container:51904fc8cd5cc729bc630decff284525", @@ -640,8 +640,8 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "view_definition": " SELECT metadata_aspect_v2.urn,\n metadata_aspect_v2.aspect\n FROM metadata_aspect_v2\n WHERE (metadata_aspect_v2.version = 0);", - "is_view": "True" + "is_view": "True", + "view_definition": " SELECT metadata_aspect_v2.urn,\n metadata_aspect_v2.aspect\n FROM metadata_aspect_v2\n WHERE (metadata_aspect_v2.version = 0);" }, "name": "metadata_aspect_view", "tags": [] @@ -856,35 +856,105 @@ "upstreams": [ { "auditStamp": { + "time": 1646575200000, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { "time": 0, - "actor": "urn:li:corpuser:unknown" + "actor": "urn:li:corpuser:_ingestion" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_v2,PROD)", - "type": "VIEW" + "type": "VIEW", + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Apostgres%2Cpostgrestest.public.metadata_aspect_view%2CPROD%29" } ], "fineGrainedLineages": [ { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_v2,PROD),aspect)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_v2,PROD),urn)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD),aspect)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD),urn)" ], - "confidenceScore": 1.0 + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Apostgres%2Cpostgrestest.public.metadata_aspect_view%2CPROD%29" }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_v2,PROD),urn)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_v2,PROD),aspect)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD),urn)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD),aspect)" ], - "confidenceScore": 1.0 + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Apostgres%2Cpostgrestest.public.metadata_aspect_view%2CPROD%29" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1646575200000, + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Apostgres%2Cpostgrestest.public.metadata_aspect_view%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "SELECT\n metadata_aspect_v2.urn,\n metadata_aspect_v2.aspect\nFROM metadata_aspect_v2\nWHERE\n (\n metadata_aspect_v2.version = 0\n )", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "lastModified": { + "time": 1646575200000, + "actor": "urn:li:corpuser:_ingestion" + } + } + }, + "systemMetadata": { + "lastObserved": 1646575200000, + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Apostgres%2Cpostgrestest.public.metadata_aspect_view%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_v2,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_v2,PROD),aspect)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_v2,PROD),urn)" + }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD),urn)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD),aspect)" } ] } @@ -894,5 +964,37 @@ "runId": "postgres-test", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Apostgres%2Cpostgrestest.public.metadata_aspect_view%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:postgres" + } + }, + "systemMetadata": { + "lastObserved": 1646575200000, + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Apostgres%2Cpostgrestest.public.metadata_aspect_view%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1646575200000, + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/postgres/postgres_mces_with_db_golden.json b/metadata-ingestion/tests/integration/postgres/postgres_mces_with_db_golden.json index fc4a0affac561..e75ea679cecf2 100644 --- a/metadata-ingestion/tests/integration/postgres/postgres_mces_with_db_golden.json +++ b/metadata-ingestion/tests/integration/postgres/postgres_mces_with_db_golden.json @@ -87,6 +87,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "container", + "entityUrn": "urn:li:container:51904fc8cd5cc729bc630decff284525", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:a6097853edba03be190d99ece4b307ff" + } + }, + "systemMetadata": { + "lastObserved": 1646575200000, + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "container", "entityUrn": "urn:li:container:51904fc8cd5cc729bc630decff284525", @@ -160,22 +176,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "container", - "entityUrn": "urn:li:container:51904fc8cd5cc729bc630decff284525", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:a6097853edba03be190d99ece4b307ff" - } - }, - "systemMetadata": { - "lastObserved": 1646575200000, - "runId": "postgres-test", - "lastRunId": "no-run-id-provided" - } -}, { "entityType": "container", "entityUrn": "urn:li:container:51904fc8cd5cc729bc630decff284525", @@ -464,8 +464,8 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "view_definition": " SELECT metadata_aspect_v2.urn,\n metadata_aspect_v2.aspect\n FROM metadata_aspect_v2\n WHERE (metadata_aspect_v2.version = 0);", - "is_view": "True" + "is_view": "True", + "view_definition": " SELECT metadata_aspect_v2.urn,\n metadata_aspect_v2.aspect\n FROM metadata_aspect_v2\n WHERE (metadata_aspect_v2.version = 0);" }, "name": "metadata_aspect_view", "tags": [] @@ -622,35 +622,105 @@ "upstreams": [ { "auditStamp": { + "time": 1646575200000, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { "time": 0, - "actor": "urn:li:corpuser:unknown" + "actor": "urn:li:corpuser:_ingestion" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_v2,PROD)", - "type": "VIEW" + "type": "VIEW", + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Apostgres%2Cpostgrestest.public.metadata_aspect_view%2CPROD%29" } ], "fineGrainedLineages": [ { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_v2,PROD),aspect)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_v2,PROD),urn)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD),aspect)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD),urn)" ], - "confidenceScore": 1.0 + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Apostgres%2Cpostgrestest.public.metadata_aspect_view%2CPROD%29" }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_v2,PROD),urn)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_v2,PROD),aspect)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD),urn)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD),aspect)" ], - "confidenceScore": 1.0 + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Apostgres%2Cpostgrestest.public.metadata_aspect_view%2CPROD%29" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1646575200000, + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Apostgres%2Cpostgrestest.public.metadata_aspect_view%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "SELECT\n metadata_aspect_v2.urn,\n metadata_aspect_v2.aspect\nFROM metadata_aspect_v2\nWHERE\n (\n metadata_aspect_v2.version = 0\n )", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "lastModified": { + "time": 1646575200000, + "actor": "urn:li:corpuser:_ingestion" + } + } + }, + "systemMetadata": { + "lastObserved": 1646575200000, + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Apostgres%2Cpostgrestest.public.metadata_aspect_view%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_v2,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_v2,PROD),aspect)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_v2,PROD),urn)" + }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD),urn)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD),aspect)" } ] } @@ -661,6 +731,38 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Apostgres%2Cpostgrestest.public.metadata_aspect_view%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:postgres" + } + }, + "systemMetadata": { + "lastObserved": 1646575200000, + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Apostgres%2Cpostgrestest.public.metadata_aspect_view%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1646575200000, + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "glossaryTerm", "entityUrn": "urn:li:glossaryTerm:URN", diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json index 72dcda25c1296..0d9386dcda0cd 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json @@ -113,11 +113,11 @@ "aspect": { "json": { "customProperties": { - "job_id": "c2d77890-83ba-435f-879b-1c77fa38dd47", + "job_id": "a06cfdca-b65e-42de-8db2-8c21c183c5dd", "job_name": "Weekly Demo Data Backup", "description": "No description available.", - "date_created": "2024-12-05 16:44:43.910000", - "date_modified": "2024-12-05 16:44:44.043000", + "date_created": "2024-12-26 12:03:35.420000", + "date_modified": "2024-12-26 12:03:35.590000", "step_id": "1", "step_name": "Set database to read only", "subsystem": "TSQL", @@ -2103,8 +2103,8 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "view_definition": "CREATE VIEW Foo.PersonsView AS SELECT * FROM Foo.Persons;\n", - "is_view": "True" + "is_view": "True", + "view_definition": "CREATE VIEW Foo.PersonsView AS SELECT * FROM Foo.Persons;\n" }, "name": "PersonsView", "tags": [] @@ -2282,8 +2282,8 @@ "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n", "input parameters": "['@ID']", "parameter @ID": "{'type': 'int'}", - "date_created": "2024-12-05 16:44:43.800000", - "date_modified": "2024-12-05 16:44:43.800000" + "date_created": "2024-12-26 12:03:35.230000", + "date_modified": "2024-12-26 12:03:35.230000" }, "externalUrl": "", "name": "DemoData.Foo.Proc.With.SpecialChar", @@ -2310,8 +2310,8 @@ "depending_on_procedure": "{}", "code": "CREATE PROCEDURE [Foo].[NewProc]\n AS\n BEGIN\n --insert into items table from salesreason table\n insert into Foo.Items (ID, ItemName)\n SELECT TempID, Name\n FROM Foo.SalesReason;\n\n\n IF OBJECT_ID('Foo.age_dist', 'U') IS NULL\n BEGIN\n -- Create and populate if table doesn't exist\n SELECT Age, COUNT(*) as Count\n INTO Foo.age_dist\n FROM Foo.Persons\n GROUP BY Age\n END\n ELSE\n BEGIN\n -- Update existing table\n TRUNCATE TABLE Foo.age_dist;\n\n INSERT INTO Foo.age_dist (Age, Count)\n SELECT Age, COUNT(*) as Count\n FROM Foo.Persons\n GROUP BY Age\n END\n\n SELECT ID, Age INTO #TEMPTABLE FROM NewData.FooNew.PersonsNew\n \n UPDATE DemoData.Foo.Persons\n SET Age = t.Age\n FROM DemoData.Foo.Persons p\n JOIN #TEMPTABLE t ON p.ID = t.ID\n\n END\n", "input parameters": "[]", - "date_created": "2024-12-05 16:44:43.803000", - "date_modified": "2024-12-05 16:44:43.803000" + "date_created": "2024-12-26 12:03:35.237000", + "date_modified": "2024-12-26 12:03:35.237000" }, "externalUrl": "", "name": "DemoData.Foo.NewProc", @@ -4427,8 +4427,8 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "view_definition": "CREATE VIEW FooNew.View1 AS\nSELECT LastName, FirstName\nFROM FooNew.PersonsNew\nWHERE Age > 18\n", - "is_view": "True" + "is_view": "True", + "view_definition": "CREATE VIEW FooNew.View1 AS\nSELECT LastName, FirstName\nFROM FooNew.PersonsNew\nWHERE Age > 18\n" }, "name": "View1", "tags": [] @@ -4891,11 +4891,16 @@ "upstreams": [ { "auditStamp": { + "time": 1615443388097, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { "time": 0, - "actor": "urn:li:corpuser:unknown" + "actor": "urn:li:corpuser:_ingestion" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD)", - "type": "VIEW" + "type": "VIEW", + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Amssql%2CDemoData.Foo.PersonsView%2CPROD%29" } ] } @@ -4906,6 +4911,73 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Amssql%2CDemoData.Foo.PersonsView%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "CREATE VIEW Foo.PersonsView AS\nSELECT\n *\nFROM Foo.Persons", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "lastModified": { + "time": 1735214618898, + "actor": "urn:li:corpuser:_ingestion" + } + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Amssql%2CDemoData.Foo.PersonsView%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD)" + }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.PersonsView,PROD)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Amssql%2CDemoData.Foo.PersonsView%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,NewData.FooNew.View1,PROD)", @@ -4916,35 +4988,105 @@ "upstreams": [ { "auditStamp": { + "time": 1615443388097, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { "time": 0, - "actor": "urn:li:corpuser:unknown" + "actor": "urn:li:corpuser:_ingestion" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD)", - "type": "VIEW" + "type": "VIEW", + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Amssql%2CNewData.FooNew.View1%2CPROD%29" } ], "fineGrainedLineages": [ { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD),firstname)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD),lastname)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,NewData.FooNew.View1,PROD),firstname)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,NewData.FooNew.View1,PROD),lastname)" ], - "confidenceScore": 1.0 + "confidenceScore": 0.2, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Amssql%2CNewData.FooNew.View1%2CPROD%29" }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD),lastname)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD),firstname)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,NewData.FooNew.View1,PROD),lastname)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,NewData.FooNew.View1,PROD),firstname)" ], - "confidenceScore": 1.0 + "confidenceScore": 0.2, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Amssql%2CNewData.FooNew.View1%2CPROD%29" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Amssql%2CNewData.FooNew.View1%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "CREATE VIEW FooNew.View1 AS\nSELECT\n LastName,\n FirstName\nFROM FooNew.PersonsNew\nWHERE\n Age > 18", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "lastModified": { + "time": 1735214618906, + "actor": "urn:li:corpuser:_ingestion" + } + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Amssql%2CNewData.FooNew.View1%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD),firstname)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD),lastname)" + }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:mssql,NewData.FooNew.View1,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,NewData.FooNew.View1,PROD),lastname)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,NewData.FooNew.View1,PROD),firstname)" } ] } @@ -4955,6 +5097,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Amssql%2CNewData.FooNew.View1%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataFlow", "entityUrn": "urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD)", @@ -5034,5 +5192,37 @@ "runId": "mssql-test", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Amssql%2CDemoData.Foo.PersonsView%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Amssql%2CNewData.FooNew.View1%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json index 0df89ff1eb94d..07098f0161fc3 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json @@ -113,11 +113,11 @@ "aspect": { "json": { "customProperties": { - "job_id": "c2d77890-83ba-435f-879b-1c77fa38dd47", + "job_id": "a06cfdca-b65e-42de-8db2-8c21c183c5dd", "job_name": "Weekly Demo Data Backup", "description": "No description available.", - "date_created": "2024-12-05 16:44:43.910000", - "date_modified": "2024-12-05 16:44:44.043000", + "date_created": "2024-12-26 12:03:35.420000", + "date_modified": "2024-12-26 12:03:35.590000", "step_id": "1", "step_name": "Set database to read only", "subsystem": "TSQL", @@ -2103,8 +2103,8 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "view_definition": "CREATE VIEW Foo.PersonsView AS SELECT * FROM Foo.Persons;\n", - "is_view": "True" + "is_view": "True", + "view_definition": "CREATE VIEW Foo.PersonsView AS SELECT * FROM Foo.Persons;\n" }, "name": "PersonsView", "tags": [] @@ -2282,8 +2282,8 @@ "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n", "input parameters": "['@ID']", "parameter @ID": "{'type': 'int'}", - "date_created": "2024-12-05 16:44:43.800000", - "date_modified": "2024-12-05 16:44:43.800000" + "date_created": "2024-12-26 12:03:35.230000", + "date_modified": "2024-12-26 12:03:35.230000" }, "externalUrl": "", "name": "DemoData.Foo.Proc.With.SpecialChar", @@ -2638,11 +2638,16 @@ "upstreams": [ { "auditStamp": { + "time": 1615443388097, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { "time": 0, - "actor": "urn:li:corpuser:unknown" + "actor": "urn:li:corpuser:_ingestion" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD)", - "type": "VIEW" + "type": "VIEW", + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Amssql%2CDemoData.Foo.PersonsView%2CPROD%29" } ] } @@ -2653,6 +2658,73 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Amssql%2CDemoData.Foo.PersonsView%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "CREATE VIEW Foo.PersonsView AS\nSELECT\n *\nFROM Foo.Persons", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "lastModified": { + "time": 1735214621644, + "actor": "urn:li:corpuser:_ingestion" + } + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Amssql%2CDemoData.Foo.PersonsView%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD)" + }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.PersonsView,PROD)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Amssql%2CDemoData.Foo.PersonsView%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataFlow", "entityUrn": "urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD)", @@ -2716,5 +2788,21 @@ "runId": "mssql-test", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Amssql%2CDemoData.Foo.PersonsView%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json index b36188405e7e1..bf30448469c30 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json @@ -137,11 +137,11 @@ "aspect": { "json": { "customProperties": { - "job_id": "b8907be7-52f5-4df4-a870-f4fe0679ec45", + "job_id": "a06cfdca-b65e-42de-8db2-8c21c183c5dd", "job_name": "Weekly Demo Data Backup", "description": "No description available.", - "date_created": "2024-12-19 12:34:45.843000", - "date_modified": "2024-12-19 12:34:46.017000", + "date_created": "2024-12-26 12:03:35.420000", + "date_modified": "2024-12-26 12:03:35.590000", "step_id": "1", "step_name": "Set database to read only", "subsystem": "TSQL", @@ -2532,8 +2532,8 @@ "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n", "input parameters": "['@ID']", "parameter @ID": "{'type': 'int'}", - "date_created": "2024-12-19 12:34:45.660000", - "date_modified": "2024-12-19 12:34:45.660000" + "date_created": "2024-12-26 12:03:35.230000", + "date_modified": "2024-12-26 12:03:35.230000" }, "externalUrl": "", "name": "DemoData.Foo.Proc.With.SpecialChar", @@ -2577,8 +2577,8 @@ "depending_on_procedure": "{}", "code": "CREATE PROCEDURE [Foo].[NewProc]\n AS\n BEGIN\n --insert into items table from salesreason table\n insert into Foo.Items (ID, ItemName)\n SELECT TempID, Name\n FROM Foo.SalesReason;\n\n\n IF OBJECT_ID('Foo.age_dist', 'U') IS NULL\n BEGIN\n -- Create and populate if table doesn't exist\n SELECT Age, COUNT(*) as Count\n INTO Foo.age_dist\n FROM Foo.Persons\n GROUP BY Age\n END\n ELSE\n BEGIN\n -- Update existing table\n TRUNCATE TABLE Foo.age_dist;\n\n INSERT INTO Foo.age_dist (Age, Count)\n SELECT Age, COUNT(*) as Count\n FROM Foo.Persons\n GROUP BY Age\n END\n\n SELECT ID, Age INTO #TEMPTABLE FROM NewData.FooNew.PersonsNew\n \n UPDATE DemoData.Foo.Persons\n SET Age = t.Age\n FROM DemoData.Foo.Persons p\n JOIN #TEMPTABLE t ON p.ID = t.ID\n\n END\n", "input parameters": "[]", - "date_created": "2024-12-19 12:34:45.667000", - "date_modified": "2024-12-19 12:34:45.667000" + "date_created": "2024-12-26 12:03:35.237000", + "date_modified": "2024-12-26 12:03:35.237000" }, "externalUrl": "", "name": "DemoData.Foo.NewProc", @@ -2968,11 +2968,67 @@ "upstreams": [ { "auditStamp": { + "time": 1615443388097, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { "time": 0, - "actor": "urn:li:corpuser:unknown" + "actor": "urn:li:corpuser:_ingestion" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:mssql,my-instance.demodata.foo.persons,PROD)", - "type": "VIEW" + "type": "VIEW", + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Amssql%2Cmy-instance.DemoData.Foo.PersonsView%2CPROD%29" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Amssql%2Cmy-instance.DemoData.Foo.PersonsView%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "CREATE VIEW Foo.PersonsView AS\nSELECT\n *\nFROM Foo.Persons", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "lastModified": { + "time": 1735214620908, + "actor": "urn:li:corpuser:_ingestion" + } + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Amssql%2Cmy-instance.DemoData.Foo.PersonsView%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:mssql,my-instance.demodata.foo.persons,PROD)" + }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:mssql,my-instance.DemoData.Foo.PersonsView,PROD)" } ] } @@ -2983,6 +3039,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Amssql%2Cmy-instance.DemoData.Foo.PersonsView%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataFlow", "entityUrn": "urn:li:dataFlow:(mssql,my-instance.DemoData.Foo.stored_procedures,PROD)", @@ -3062,5 +3134,21 @@ "runId": "mssql-test", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Amssql%2Cmy-instance.DemoData.Foo.PersonsView%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json index ebcadcc11dcbf..ff27989d71de1 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json @@ -113,11 +113,11 @@ "aspect": { "json": { "customProperties": { - "job_id": "4130c37d-146c-43da-a671-dd9a413a44b3", + "job_id": "a06cfdca-b65e-42de-8db2-8c21c183c5dd", "job_name": "Weekly Demo Data Backup", "description": "No description available.", - "date_created": "2024-11-22 12:58:03.260000", - "date_modified": "2024-11-22 12:58:03.440000", + "date_created": "2024-12-26 12:03:35.420000", + "date_modified": "2024-12-26 12:03:35.590000", "step_id": "1", "step_name": "Set database to read only", "subsystem": "TSQL", @@ -2103,8 +2103,8 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "view_definition": "CREATE VIEW Foo.PersonsView AS SELECT * FROM Foo.Persons;\n", - "is_view": "True" + "is_view": "True", + "view_definition": "CREATE VIEW Foo.PersonsView AS SELECT * FROM Foo.Persons;\n" }, "name": "PersonsView", "tags": [] @@ -2282,8 +2282,8 @@ "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n", "input parameters": "['@ID']", "parameter @ID": "{'type': 'int'}", - "date_created": "2024-11-22 12:58:03.137000", - "date_modified": "2024-11-22 12:58:03.137000" + "date_created": "2024-12-26 12:03:35.230000", + "date_modified": "2024-12-26 12:03:35.230000" }, "externalUrl": "", "name": "DemoData.Foo.Proc.With.SpecialChar", @@ -2310,8 +2310,8 @@ "depending_on_procedure": "{}", "code": "CREATE PROCEDURE [Foo].[NewProc]\n AS\n BEGIN\n --insert into items table from salesreason table\n insert into Foo.Items (ID, ItemName)\n SELECT TempID, Name\n FROM Foo.SalesReason;\n\n\n IF OBJECT_ID('Foo.age_dist', 'U') IS NULL\n BEGIN\n -- Create and populate if table doesn't exist\n SELECT Age, COUNT(*) as Count\n INTO Foo.age_dist\n FROM Foo.Persons\n GROUP BY Age\n END\n ELSE\n BEGIN\n -- Update existing table\n TRUNCATE TABLE Foo.age_dist;\n\n INSERT INTO Foo.age_dist (Age, Count)\n SELECT Age, COUNT(*) as Count\n FROM Foo.Persons\n GROUP BY Age\n END\n\n SELECT ID, Age INTO #TEMPTABLE FROM NewData.FooNew.PersonsNew\n \n UPDATE DemoData.Foo.Persons\n SET Age = t.Age\n FROM DemoData.Foo.Persons p\n JOIN #TEMPTABLE t ON p.ID = t.ID\n\n END\n", "input parameters": "[]", - "date_created": "2024-11-22 12:58:03.140000", - "date_modified": "2024-11-22 12:58:03.140000" + "date_created": "2024-12-26 12:03:35.237000", + "date_modified": "2024-12-26 12:03:35.237000" }, "externalUrl": "", "name": "DemoData.Foo.NewProc", @@ -4427,8 +4427,8 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "view_definition": "CREATE VIEW FooNew.View1 AS\nSELECT LastName, FirstName\nFROM FooNew.PersonsNew\nWHERE Age > 18\n", - "is_view": "True" + "is_view": "True", + "view_definition": "CREATE VIEW FooNew.View1 AS\nSELECT LastName, FirstName\nFROM FooNew.PersonsNew\nWHERE Age > 18\n" }, "name": "View1", "tags": [] @@ -4891,57 +4891,66 @@ "upstreams": [ { "auditStamp": { + "time": 1615443388097, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { "time": 0, - "actor": "urn:li:corpuser:unknown" + "actor": "urn:li:corpuser:_ingestion" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD)", - "type": "VIEW" + "type": "VIEW", + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Amssql%2Cdemodata.foo.personsview%2CPROD%29" } ], "fineGrainedLineages": [ { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD),Age)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD),ID)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.personsview,PROD),Age)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.personsview,PROD),ID)" ], - "confidenceScore": 1.0 + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Amssql%2Cdemodata.foo.personsview%2CPROD%29" }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD),FirstName)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD),LastName)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.personsview,PROD),FirstName)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.personsview,PROD),LastName)" ], - "confidenceScore": 1.0 + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Amssql%2Cdemodata.foo.personsview%2CPROD%29" }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD),ID)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD),FirstName)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.personsview,PROD),ID)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.personsview,PROD),FirstName)" ], - "confidenceScore": 1.0 + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Amssql%2Cdemodata.foo.personsview%2CPROD%29" }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD),LastName)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD),Age)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.personsview,PROD),LastName)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.personsview,PROD),Age)" ], - "confidenceScore": 1.0 + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Amssql%2Cdemodata.foo.personsview%2CPROD%29" } ] } @@ -4952,6 +4961,97 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Amssql%2Cdemodata.foo.personsview%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "CREATE VIEW Foo.PersonsView AS\nSELECT\n *\nFROM Foo.Persons", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "lastModified": { + "time": 1735214622805, + "actor": "urn:li:corpuser:_ingestion" + } + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Amssql%2Cdemodata.foo.personsview%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD),Age)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD),FirstName)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD),ID)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD),LastName)" + }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.personsview,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.personsview,PROD),ID)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.personsview,PROD),LastName)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.personsview,PROD),FirstName)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.personsview,PROD),Age)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Amssql%2Cdemodata.foo.personsview%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.view1,PROD)", @@ -4962,35 +5062,105 @@ "upstreams": [ { "auditStamp": { + "time": 1615443388097, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { "time": 0, - "actor": "urn:li:corpuser:unknown" + "actor": "urn:li:corpuser:_ingestion" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD)", - "type": "VIEW" + "type": "VIEW", + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Amssql%2Cnewdata.foonew.view1%2CPROD%29" } ], "fineGrainedLineages": [ { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD),FirstName)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD),LastName)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.view1,PROD),FirstName)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.view1,PROD),LastName)" ], - "confidenceScore": 1.0 + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Amssql%2Cnewdata.foonew.view1%2CPROD%29" }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD),LastName)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD),FirstName)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.view1,PROD),LastName)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.view1,PROD),FirstName)" ], - "confidenceScore": 1.0 + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Amssql%2Cnewdata.foonew.view1%2CPROD%29" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Amssql%2Cnewdata.foonew.view1%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "CREATE VIEW FooNew.View1 AS\nSELECT\n LastName,\n FirstName\nFROM FooNew.PersonsNew\nWHERE\n Age > 18", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "lastModified": { + "time": 1735214622810, + "actor": "urn:li:corpuser:_ingestion" + } + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Amssql%2Cnewdata.foonew.view1%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD),FirstName)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD),LastName)" + }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.view1,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.view1,PROD),LastName)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.view1,PROD),FirstName)" } ] } @@ -5001,6 +5171,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Amssql%2Cnewdata.foonew.view1%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),NewProc)", @@ -5151,5 +5337,37 @@ "runId": "mssql-test", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Amssql%2Cdemodata.foo.personsview%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Amssql%2Cnewdata.foonew.view1%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/trino/trino_hive_instance_mces_golden.json b/metadata-ingestion/tests/integration/trino/trino_hive_instance_mces_golden.json index 6745268ea2c24..fe85b6b4396fb 100644 --- a/metadata-ingestion/tests/integration/trino/trino_hive_instance_mces_golden.json +++ b/metadata-ingestion/tests/integration/trino/trino_hive_instance_mces_golden.json @@ -94,6 +94,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "container", + "entityUrn": "urn:li:container:46baa6eebd802861e5ee3d043456e171", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "container", "entityUrn": "urn:li:container:46baa6eebd802861e5ee3d043456e171", @@ -169,22 +185,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "container", - "entityUrn": "urn:li:container:46baa6eebd802861e5ee3d043456e171", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b" - } - }, - "systemMetadata": { - "lastObserved": 1632398400000, - "runId": "trino-hive-instance-test", - "lastRunId": "no-run-id-provided" - } -}, { "entityType": "container", "entityUrn": "urn:li:container:46baa6eebd802861e5ee3d043456e171", @@ -246,7 +246,7 @@ "numrows": "1", "rawdatasize": "32", "totalsize": "33", - "transient_lastddltime": "1724180599" + "transient_lastddltime": "1735206396" }, "name": "array_struct_test", "description": "This table has array of structs", @@ -507,7 +507,7 @@ "numrows": "3", "rawdatasize": "94", "totalsize": "97", - "transient_lastddltime": "1724180605" + "transient_lastddltime": "1735206403" }, "name": "classification_test", "tags": [] @@ -766,7 +766,7 @@ "numrows": "0", "rawdatasize": "0", "totalsize": "0", - "transient_lastddltime": "1724180602" + "transient_lastddltime": "1735206400" }, "name": "map_test", "tags": [] @@ -993,7 +993,7 @@ "numrows": "0", "rawdatasize": "0", "totalsize": "0", - "transient_lastddltime": "1724180602" + "transient_lastddltime": "1735206400" }, "name": "nested_struct_test", "tags": [] @@ -1264,7 +1264,7 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "transient_lastddltime": "1724180591" + "transient_lastddltime": "1735206384" }, "name": "pokes", "tags": [] @@ -1499,7 +1499,7 @@ "numrows": "0", "rawdatasize": "0", "totalsize": "0", - "transient_lastddltime": "1724180595" + "transient_lastddltime": "1735206390" }, "name": "struct_test", "tags": [] @@ -1750,7 +1750,7 @@ "customProperties": { "numfiles": "0", "totalsize": "0", - "transient_lastddltime": "1724180601" + "transient_lastddltime": "1735206399" }, "name": "struct_test_view_materialized", "tags": [] @@ -2004,7 +2004,7 @@ "numrows": "0", "rawdatasize": "0", "totalsize": "0", - "transient_lastddltime": "1724180595" + "transient_lastddltime": "1735206390" }, "name": "_test_table_underscore", "tags": [] @@ -2227,7 +2227,7 @@ "numrows": "0", "rawdatasize": "0", "totalsize": "0", - "transient_lastddltime": "1724180602" + "transient_lastddltime": "1735206400" }, "name": "union_test", "tags": [] @@ -2529,9 +2529,9 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "transient_lastddltime": "1724180602", - "view_definition": "SELECT \"property_id\", \"service\"\nFROM \"db1\".\"array_struct_test\"", - "is_view": "True" + "transient_lastddltime": "1735206400", + "is_view": "True", + "view_definition": "SELECT \"property_id\", \"service\"\nFROM \"db1\".\"array_struct_test\"" }, "name": "array_struct_test_view", "tags": [] @@ -2758,11 +2758,16 @@ "upstreams": [ { "auditStamp": { + "time": 1632398400000, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { "time": 0, - "actor": "urn:li:corpuser:unknown" + "actor": "urn:li:corpuser:_ingestion" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.array_struct_test,PROD)", - "type": "VIEW" + "type": "VIEW", + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Atrino%2Cproduction_warehouse.hivedb.db1.array_struct_test_view%2CPROD%29" } ], "fineGrainedLineages": [ @@ -2775,7 +2780,8 @@ "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.array_struct_test_view,PROD),property_id)" ], - "confidenceScore": 1.0 + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Atrino%2Cproduction_warehouse.hivedb.db1.array_struct_test_view%2CPROD%29" }, { "upstreamType": "FIELD_SET", @@ -2786,7 +2792,8 @@ "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.array_struct_test_view,PROD),service)" ], - "confidenceScore": 1.0 + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Atrino%2Cproduction_warehouse.hivedb.db1.array_struct_test_view%2CPROD%29" } ] } @@ -2797,6 +2804,85 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Atrino%2Cproduction_warehouse.hivedb.db1.array_struct_test_view%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "SELECT\n \"property_id\",\n \"service\"\nFROM \"db1\".\"array_struct_test\"", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "lastModified": { + "time": 1632398400000, + "actor": "urn:li:corpuser:_ingestion" + } + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Atrino%2Cproduction_warehouse.hivedb.db1.array_struct_test_view%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.array_struct_test,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.array_struct_test,PROD),property_id)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.array_struct_test,PROD),service)" + }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.array_struct_test_view,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.array_struct_test_view,PROD),property_id)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.array_struct_test_view,PROD),service)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Atrino%2Cproduction_warehouse.hivedb.db1.array_struct_test_view%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:trino" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1._test_table_underscore,PROD)", @@ -2956,5 +3042,21 @@ "runId": "trino-hive-instance-test", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Atrino%2Cproduction_warehouse.hivedb.db1.array_struct_test_view%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/trino/trino_hive_mces_golden.json b/metadata-ingestion/tests/integration/trino/trino_hive_mces_golden.json index 34acf6a6e369b..d68f014c93ac8 100644 --- a/metadata-ingestion/tests/integration/trino/trino_hive_mces_golden.json +++ b/metadata-ingestion/tests/integration/trino/trino_hive_mces_golden.json @@ -87,6 +87,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "container", + "entityUrn": "urn:li:container:304fd7ad57dc0ab32fb2cb778cbccd84", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:c7a81f6ed9a7cdd0c74436ac2dc4d1f7" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "container", "entityUrn": "urn:li:container:304fd7ad57dc0ab32fb2cb778cbccd84", @@ -160,22 +176,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "container", - "entityUrn": "urn:li:container:304fd7ad57dc0ab32fb2cb778cbccd84", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:c7a81f6ed9a7cdd0c74436ac2dc4d1f7" - } - }, - "systemMetadata": { - "lastObserved": 1632398400000, - "runId": "trino-hive-test", - "lastRunId": "no-run-id-provided" - } -}, { "entityType": "container", "entityUrn": "urn:li:container:304fd7ad57dc0ab32fb2cb778cbccd84", @@ -233,7 +233,7 @@ "numrows": "1", "rawdatasize": "32", "totalsize": "33", - "transient_lastddltime": "1724180599" + "transient_lastddltime": "1735206396" }, "name": "array_struct_test", "description": "This table has array of structs", @@ -473,7 +473,7 @@ "numrows": "3", "rawdatasize": "94", "totalsize": "97", - "transient_lastddltime": "1724180605" + "transient_lastddltime": "1735206403" }, "name": "classification_test", "tags": [] @@ -755,7 +755,7 @@ "numrows": "0", "rawdatasize": "0", "totalsize": "0", - "transient_lastddltime": "1724180602" + "transient_lastddltime": "1735206400" }, "name": "map_test", "tags": [] @@ -961,7 +961,7 @@ "numrows": "0", "rawdatasize": "0", "totalsize": "0", - "transient_lastddltime": "1724180602" + "transient_lastddltime": "1735206400" }, "name": "nested_struct_test", "tags": [] @@ -1211,7 +1211,7 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "transient_lastddltime": "1724180591" + "transient_lastddltime": "1735206384" }, "name": "pokes", "tags": [] @@ -1425,7 +1425,7 @@ "numrows": "0", "rawdatasize": "0", "totalsize": "0", - "transient_lastddltime": "1724180595" + "transient_lastddltime": "1735206390" }, "name": "struct_test", "tags": [] @@ -1655,7 +1655,7 @@ "customProperties": { "numfiles": "0", "totalsize": "0", - "transient_lastddltime": "1724180601" + "transient_lastddltime": "1735206399" }, "name": "struct_test_view_materialized", "tags": [] @@ -1888,7 +1888,7 @@ "numrows": "0", "rawdatasize": "0", "totalsize": "0", - "transient_lastddltime": "1724180595" + "transient_lastddltime": "1735206390" }, "name": "_test_table_underscore", "tags": [] @@ -2090,7 +2090,7 @@ "numrows": "0", "rawdatasize": "0", "totalsize": "0", - "transient_lastddltime": "1724180602" + "transient_lastddltime": "1735206400" }, "name": "union_test", "tags": [] @@ -2371,9 +2371,9 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "transient_lastddltime": "1724180602", - "view_definition": "SELECT \"property_id\", \"service\"\nFROM \"db1\".\"array_struct_test\"", - "is_view": "True" + "transient_lastddltime": "1735206400", + "is_view": "True", + "view_definition": "SELECT \"property_id\", \"service\"\nFROM \"db1\".\"array_struct_test\"" }, "name": "array_struct_test_view", "tags": [] @@ -2579,11 +2579,16 @@ "upstreams": [ { "auditStamp": { + "time": 1632398400000, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { "time": 0, - "actor": "urn:li:corpuser:unknown" + "actor": "urn:li:corpuser:_ingestion" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test,PROD)", - "type": "VIEW" + "type": "VIEW", + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Atrino%2Chivedb.db1.array_struct_test_view%2CPROD%29" } ], "fineGrainedLineages": [ @@ -2596,7 +2601,8 @@ "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test_view,PROD),property_id)" ], - "confidenceScore": 1.0 + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Atrino%2Chivedb.db1.array_struct_test_view%2CPROD%29" }, { "upstreamType": "FIELD_SET", @@ -2607,7 +2613,71 @@ "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test_view,PROD),service)" ], - "confidenceScore": 1.0 + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Atrino%2Chivedb.db1.array_struct_test_view%2CPROD%29" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Atrino%2Chivedb.db1.array_struct_test_view%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "SELECT\n \"property_id\",\n \"service\"\nFROM \"db1\".\"array_struct_test\"", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "lastModified": { + "time": 1632398400000, + "actor": "urn:li:corpuser:_ingestion" + } + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Atrino%2Chivedb.db1.array_struct_test_view%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test,PROD),property_id)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test,PROD),service)" + }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test_view,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test_view,PROD),property_id)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test_view,PROD),service)" } ] } @@ -2618,6 +2688,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Atrino%2Chivedb.db1.array_struct_test_view%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:trino" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1._test_table_underscore,PROD)", @@ -2778,6 +2864,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Atrino%2Chivedb.db1.array_struct_test_view%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "glossaryTerm", "entityUrn": "urn:li:glossaryTerm:Age", diff --git a/metadata-ingestion/tests/unit/sdk/test_mce_builder.py b/metadata-ingestion/tests/unit/sdk/test_mce_builder.py index d7c84f7863b40..3bdbf07bf28b7 100644 --- a/metadata-ingestion/tests/unit/sdk/test_mce_builder.py +++ b/metadata-ingestion/tests/unit/sdk/test_mce_builder.py @@ -1,3 +1,5 @@ +from datetime import datetime, timezone + import datahub.emitter.mce_builder as builder from datahub.metadata.schema_classes import ( DataFlowInfoClass, @@ -55,3 +57,18 @@ def test_make_group_urn() -> None: assert ( builder.make_group_urn("urn:li:corpuser:someUser") == "urn:li:corpuser:someUser" ) + + +def test_ts_millis() -> None: + assert builder.make_ts_millis(None) is None + assert builder.parse_ts_millis(None) is None + + assert ( + builder.make_ts_millis(datetime(2024, 1, 1, 2, 3, 4, 5, timezone.utc)) + == 1704074584000 + ) + + # We only have millisecond precision, don't support microseconds. + ts = datetime.now(timezone.utc).replace(microsecond=0) + ts_millis = builder.make_ts_millis(ts) + assert builder.parse_ts_millis(ts_millis) == ts diff --git a/metadata-ingestion/tests/unit/serde/test_codegen.py b/metadata-ingestion/tests/unit/serde/test_codegen.py index 98d62d5643ff2..b49f715312913 100644 --- a/metadata-ingestion/tests/unit/serde/test_codegen.py +++ b/metadata-ingestion/tests/unit/serde/test_codegen.py @@ -6,11 +6,10 @@ import pytest import typing_inspect -from datahub.emitter.enum_helpers import get_enum_options +from datahub.emitter.mce_builder import ALL_ENV_TYPES from datahub.metadata.schema_classes import ( ASPECT_CLASSES, KEY_ASPECTS, - FabricTypeClass, FineGrainedLineageClass, MetadataChangeEventClass, OwnershipClass, @@ -164,8 +163,7 @@ def _err(msg: str) -> None: def test_enum_options(): # This is mainly a sanity check to ensure that it doesn't do anything too crazy. - env_options = get_enum_options(FabricTypeClass) - assert "PROD" in env_options + assert "PROD" in ALL_ENV_TYPES def test_urn_types() -> None: diff --git a/metadata-io/build.gradle b/metadata-io/build.gradle index 516a77d59d50b..88bbfa2e10c4c 100644 --- a/metadata-io/build.gradle +++ b/metadata-io/build.gradle @@ -102,7 +102,7 @@ dependencies { testImplementation(testFixtures(project(":entity-registry"))) testAnnotationProcessor externalDependency.lombok - + testImplementation project(':mock-entity-registry') constraints { implementation(externalDependency.log4jCore) { because("previous versions are vulnerable to CVE-2021-45105") diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeline/data/dataset/schema/SchemaFieldBusinessAttributeChangeEvent.java b/metadata-io/src/main/java/com/linkedin/metadata/timeline/data/dataset/schema/SchemaFieldBusinessAttributeChangeEvent.java new file mode 100644 index 0000000000000..1f1252e208545 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/timeline/data/dataset/schema/SchemaFieldBusinessAttributeChangeEvent.java @@ -0,0 +1,38 @@ +package com.linkedin.metadata.timeline.data.dataset.schema; + +import com.google.common.collect.ImmutableMap; +import com.linkedin.common.AuditStamp; +import com.linkedin.common.urn.Urn; +import com.linkedin.metadata.timeline.data.ChangeCategory; +import com.linkedin.metadata.timeline.data.ChangeEvent; +import com.linkedin.metadata.timeline.data.ChangeOperation; +import com.linkedin.metadata.timeline.data.SemanticChangeType; +import lombok.Builder; + +public class SchemaFieldBusinessAttributeChangeEvent extends ChangeEvent { + @Builder(builderMethodName = "schemaFieldBusinessAttributeChangeEventBuilder") + public SchemaFieldBusinessAttributeChangeEvent( + String entityUrn, + ChangeCategory category, + ChangeOperation operation, + String modifier, + AuditStamp auditStamp, + SemanticChangeType semVerChange, + String description, + Urn parentUrn, + Urn businessAttributeUrn, + Urn datasetUrn) { + super( + entityUrn, + category, + operation, + modifier, + ImmutableMap.of( + "parentUrn", parentUrn.toString(), + "businessAttributeUrn", businessAttributeUrn.toString(), + "datasetUrn", datasetUrn.toString()), + auditStamp, + semVerChange, + description); + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeline/eventgenerator/BusinessAttributesChangeEventGenerator.java b/metadata-io/src/main/java/com/linkedin/metadata/timeline/eventgenerator/BusinessAttributesChangeEventGenerator.java new file mode 100644 index 0000000000000..69d20f2f41bd5 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/timeline/eventgenerator/BusinessAttributesChangeEventGenerator.java @@ -0,0 +1,98 @@ +package com.linkedin.metadata.timeline.eventgenerator; + +import com.linkedin.businessattribute.BusinessAttributeAssociation; +import com.linkedin.businessattribute.BusinessAttributes; +import com.linkedin.common.AuditStamp; +import com.linkedin.common.urn.Urn; +import com.linkedin.metadata.timeline.data.ChangeCategory; +import com.linkedin.metadata.timeline.data.ChangeEvent; +import com.linkedin.metadata.timeline.data.ChangeOperation; +import com.linkedin.metadata.timeline.data.SemanticChangeType; +import com.linkedin.metadata.timeline.data.dataset.schema.SchemaFieldBusinessAttributeChangeEvent; +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; +import javax.annotation.Nonnull; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public class BusinessAttributesChangeEventGenerator + extends EntityChangeEventGenerator { + + private static final String BUSINESS_ATTRIBUTE_ADDED_FORMAT = + "BusinessAttribute '%s' added to entity '%s'."; + private static final String BUSINESS_ATTRIBUTE_REMOVED_FORMAT = + "BusinessAttribute '%s' removed from entity '%s'."; + + @Override + public List getChangeEvents( + @Nonnull Urn urn, + @Nonnull String entityName, + @Nonnull String aspectName, + @Nonnull Aspect from, + @Nonnull Aspect to, + @Nonnull AuditStamp auditStamp) { + log.debug( + "Calling BusinessAttributesChangeEventGenerator for entity {} and aspect {}", + entityName, + aspectName); + return computeDiff(urn, entityName, aspectName, from.getValue(), to.getValue(), auditStamp); + } + + private List computeDiff( + Urn urn, + String entityName, + String aspectName, + BusinessAttributes previousValue, + BusinessAttributes newValue, + AuditStamp auditStamp) { + List changeEvents = new ArrayList<>(); + + BusinessAttributeAssociation previousAssociation = + previousValue != null ? previousValue.getBusinessAttribute() : null; + BusinessAttributeAssociation newAssociation = + newValue != null ? newValue.getBusinessAttribute() : null; + + if (Objects.nonNull(previousAssociation) && Objects.isNull(newAssociation)) { + changeEvents.add( + createChangeEvent( + previousAssociation, + urn, + ChangeOperation.REMOVE, + BUSINESS_ATTRIBUTE_REMOVED_FORMAT, + auditStamp)); + + } else if (Objects.isNull(previousAssociation) && Objects.nonNull(newAssociation)) { + changeEvents.add( + createChangeEvent( + newAssociation, + urn, + ChangeOperation.ADD, + BUSINESS_ATTRIBUTE_ADDED_FORMAT, + auditStamp)); + } + return changeEvents; + } + + private ChangeEvent createChangeEvent( + BusinessAttributeAssociation businessAttributeAssociation, + Urn entityUrn, + ChangeOperation changeOperation, + String format, + AuditStamp auditStamp) { + return SchemaFieldBusinessAttributeChangeEvent.schemaFieldBusinessAttributeChangeEventBuilder() + .entityUrn(entityUrn.toString()) + .category(ChangeCategory.BUSINESS_ATTRIBUTE) + .operation(changeOperation) + .modifier(businessAttributeAssociation.getBusinessAttributeUrn().toString()) + .auditStamp(auditStamp) + .semVerChange(SemanticChangeType.MINOR) + .description( + String.format( + format, businessAttributeAssociation.getBusinessAttributeUrn().getId(), entityUrn)) + .parentUrn(entityUrn) + .businessAttributeUrn(businessAttributeAssociation.getBusinessAttributeUrn()) + .datasetUrn(entityUrn.getIdAsUrn()) + .build(); + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/timeline/eventgenerator/BusinessAttributesChangeEventGeneratorTest.java b/metadata-io/src/test/java/com/linkedin/metadata/timeline/eventgenerator/BusinessAttributesChangeEventGeneratorTest.java new file mode 100644 index 0000000000000..fb4c5ca3f9688 --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/timeline/eventgenerator/BusinessAttributesChangeEventGeneratorTest.java @@ -0,0 +1,124 @@ +package com.linkedin.metadata.timeline.eventgenerator; + +import static org.testng.AssertJUnit.assertEquals; + +import com.linkedin.businessattribute.BusinessAttributeAssociation; +import com.linkedin.businessattribute.BusinessAttributes; +import com.linkedin.common.AuditStamp; +import com.linkedin.common.urn.BusinessAttributeUrn; +import com.linkedin.common.urn.Urn; +import com.linkedin.data.ByteString; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.metadata.Constants; +import com.linkedin.metadata.models.AspectSpec; +import com.linkedin.metadata.timeline.data.ChangeEvent; +import com.linkedin.metadata.timeline.data.ChangeOperation; +import com.linkedin.metadata.utils.GenericRecordUtils; +import com.linkedin.mxe.SystemMetadata; +import java.net.URISyntaxException; +import java.nio.charset.StandardCharsets; +import java.util.List; +import mock.MockEntitySpec; +import org.springframework.test.context.testng.AbstractTestNGSpringContextTests; +import org.testng.annotations.Test; + +public class BusinessAttributesChangeEventGeneratorTest extends AbstractTestNGSpringContextTests { + + private static Urn getSchemaFieldUrn() throws URISyntaxException { + return Urn.createFromString( + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleHdfsDataset,PROD),user_id)"); + } + + private static final String BUSINESS_ATTRIBUTE_URN = + "urn:li:businessAttribute:cypressTestAttribute"; + + private static AuditStamp getTestAuditStamp() throws URISyntaxException { + return new AuditStamp() + .setActor(Urn.createFromString("urn:li:corpuser:__datahub_system")) + .setTime(1683829509553L); + } + + private static Aspect getBusinessAttributes( + BusinessAttributeAssociation association) { + return new Aspect<>( + new BusinessAttributes().setBusinessAttribute(association), new SystemMetadata()); + } + + private static Aspect getNullBusinessAttributes() { + MockEntitySpec mockEntitySpec = new MockEntitySpec("schemaField"); + BusinessAttributes businessAttributes = new BusinessAttributes(); + final AspectSpec aspectSpec = + mockEntitySpec.createAspectSpec(businessAttributes, Constants.BUSINESS_ATTRIBUTE_ASPECT); + final RecordTemplate nullAspect = + GenericRecordUtils.deserializeAspect( + ByteString.copyString("{}", StandardCharsets.UTF_8), "application/json", aspectSpec); + return new Aspect(nullAspect, new SystemMetadata()); + } + + @Test + public void testBusinessAttributeAddition() throws Exception { + BusinessAttributesChangeEventGenerator businessAttributesChangeEventGenerator = + new BusinessAttributesChangeEventGenerator(); + + Urn urn = getSchemaFieldUrn(); + String entity = "schemaField"; + String aspect = "businessAttributes"; + AuditStamp auditStamp = getTestAuditStamp(); + + Aspect from = getNullBusinessAttributes(); + Aspect to = + getBusinessAttributes( + new BusinessAttributeAssociation() + .setBusinessAttributeUrn(new BusinessAttributeUrn(BUSINESS_ATTRIBUTE_URN))); + + List actual = + businessAttributesChangeEventGenerator.getChangeEvents( + urn, entity, aspect, from, to, auditStamp); + assertEquals(1, actual.size()); + assertEquals(ChangeOperation.ADD.name(), actual.get(0).getOperation().name()); + assertEquals(getSchemaFieldUrn(), Urn.createFromString(actual.get(0).getEntityUrn())); + } + + @Test + public void testBusinessAttributeRemoval() throws Exception { + BusinessAttributesChangeEventGenerator test = new BusinessAttributesChangeEventGenerator(); + + Urn urn = getSchemaFieldUrn(); + String entity = "schemaField"; + String aspect = "businessAttributes"; + AuditStamp auditStamp = getTestAuditStamp(); + + Aspect from = + getBusinessAttributes( + new BusinessAttributeAssociation() + .setBusinessAttributeUrn(new BusinessAttributeUrn(BUSINESS_ATTRIBUTE_URN))); + Aspect to = getNullBusinessAttributes(); + + List actual = test.getChangeEvents(urn, entity, aspect, from, to, auditStamp); + assertEquals(1, actual.size()); + assertEquals(ChangeOperation.REMOVE.name(), actual.get(0).getOperation().name()); + assertEquals(getSchemaFieldUrn(), Urn.createFromString(actual.get(0).getEntityUrn())); + } + + @Test + public void testNoChange() throws Exception { + BusinessAttributesChangeEventGenerator test = new BusinessAttributesChangeEventGenerator(); + + Urn urn = getSchemaFieldUrn(); + String entity = "schemaField"; + String aspect = "businessAttributes"; + AuditStamp auditStamp = getTestAuditStamp(); + + Aspect from = + getBusinessAttributes( + new BusinessAttributeAssociation() + .setBusinessAttributeUrn(new BusinessAttributeUrn(BUSINESS_ATTRIBUTE_URN))); + Aspect to = + getBusinessAttributes( + new BusinessAttributeAssociation() + .setBusinessAttributeUrn(new BusinessAttributeUrn(BUSINESS_ATTRIBUTE_URN))); + + List actual = test.getChangeEvents(urn, entity, aspect, from, to, auditStamp); + assertEquals(0, actual.size()); + } +} diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/event/EntityChangeEventGeneratorHook.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/event/EntityChangeEventGeneratorHook.java index de570cc91b2fe..17e34f151ae01 100644 --- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/event/EntityChangeEventGeneratorHook.java +++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/event/EntityChangeEventGeneratorHook.java @@ -1,7 +1,5 @@ package com.linkedin.metadata.kafka.hook.event; -import static com.linkedin.metadata.Constants.SCHEMA_FIELD_ENTITY_NAME; - import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableSet; import com.linkedin.common.AuditStamp; @@ -27,6 +25,7 @@ import com.linkedin.platform.event.v1.Parameters; import io.datahubproject.metadata.context.OperationContext; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import java.util.Objects; import java.util.Set; @@ -65,6 +64,7 @@ public class EntityChangeEventGeneratorHook implements MetadataChangeLogHook { Constants.ASSERTION_RUN_EVENT_ASPECT_NAME, Constants.DATA_PROCESS_INSTANCE_RUN_EVENT_ASPECT_NAME, Constants.BUSINESS_ATTRIBUTE_INFO_ASPECT_NAME, + Constants.BUSINESS_ATTRIBUTE_ASPECT, // Entity Lifecycle Event Constants.DATASET_KEY_ASPECT_NAME, @@ -83,13 +83,12 @@ public class EntityChangeEventGeneratorHook implements MetadataChangeLogHook { private static final Set SUPPORTED_OPERATIONS = ImmutableSet.of("CREATE", "UPSERT", "DELETE"); - private static final Set ENTITY_EXCLUSIONS = ImmutableSet.of(SCHEMA_FIELD_ENTITY_NAME); - private final EntityChangeEventGeneratorRegistry entityChangeEventGeneratorRegistry; private final OperationContext systemOperationContext; private final SystemEntityClient systemEntityClient; private final Boolean isEnabled; @Getter private final String consumerGroupSuffix; + private final List entityExclusions; @Autowired public EntityChangeEventGeneratorHook( @@ -98,13 +97,16 @@ public EntityChangeEventGeneratorHook( final EntityChangeEventGeneratorRegistry entityChangeEventGeneratorRegistry, @Nonnull final SystemEntityClient entityClient, @Nonnull @Value("${entityChangeEvents.enabled:true}") Boolean isEnabled, - @Nonnull @Value("${entityChangeEvents.consumerGroupSuffix}") String consumerGroupSuffix) { + @Nonnull @Value("${entityChangeEvents.consumerGroupSuffix}") String consumerGroupSuffix, + @Nonnull @Value("#{'${entityChangeEvents.entityExclusions}'.split(',')}") + List entityExclusions) { this.systemOperationContext = systemOperationContext; this.entityChangeEventGeneratorRegistry = Objects.requireNonNull(entityChangeEventGeneratorRegistry); this.systemEntityClient = Objects.requireNonNull(entityClient); this.isEnabled = isEnabled; this.consumerGroupSuffix = consumerGroupSuffix; + this.entityExclusions = entityExclusions; } @VisibleForTesting @@ -113,7 +115,13 @@ public EntityChangeEventGeneratorHook( @Nonnull final EntityChangeEventGeneratorRegistry entityChangeEventGeneratorRegistry, @Nonnull final SystemEntityClient entityClient, @Nonnull Boolean isEnabled) { - this(systemOperationContext, entityChangeEventGeneratorRegistry, entityClient, isEnabled, ""); + this( + systemOperationContext, + entityChangeEventGeneratorRegistry, + entityClient, + isEnabled, + "", + Collections.emptyList()); } @Override @@ -202,7 +210,7 @@ private List generateChangeEvents( private boolean isEligibleForProcessing(final MetadataChangeLog log) { return SUPPORTED_OPERATIONS.contains(log.getChangeType().toString()) && SUPPORTED_ASPECT_NAMES.contains(log.getAspectName()) - && !ENTITY_EXCLUSIONS.contains(log.getEntityType()); + && !entityExclusions.contains(log.getEntityType()); } private void emitPlatformEvent( diff --git a/metadata-service/configuration/src/main/resources/application.yaml b/metadata-service/configuration/src/main/resources/application.yaml index b997bc108e4ba..f6fa4a37fdadb 100644 --- a/metadata-service/configuration/src/main/resources/application.yaml +++ b/metadata-service/configuration/src/main/resources/application.yaml @@ -467,6 +467,7 @@ featureFlags: entityChangeEvents: enabled: ${ENABLE_ENTITY_CHANGE_EVENTS_HOOK:true} consumerGroupSuffix: ${ECE_CONSUMER_GROUP_SUFFIX:} + entityExclusions: ${ECE_ENTITY_EXCLUSIONS:schemaField} # provides a comma separated list of entities to exclude from the ECE hook views: enabled: ${VIEWS_ENABLED:true} diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/timeline/eventgenerator/EntityChangeEventGeneratorRegistryFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/timeline/eventgenerator/EntityChangeEventGeneratorRegistryFactory.java index cd8eb4f1218db..10770b83ad881 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/timeline/eventgenerator/EntityChangeEventGeneratorRegistryFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/timeline/eventgenerator/EntityChangeEventGeneratorRegistryFactory.java @@ -6,6 +6,7 @@ import com.linkedin.metadata.timeline.eventgenerator.AssertionRunEventChangeEventGenerator; import com.linkedin.metadata.timeline.eventgenerator.BusinessAttributeAssociationChangeEventGenerator; import com.linkedin.metadata.timeline.eventgenerator.BusinessAttributeInfoChangeEventGenerator; +import com.linkedin.metadata.timeline.eventgenerator.BusinessAttributesChangeEventGenerator; import com.linkedin.metadata.timeline.eventgenerator.DataProcessInstanceRunEventChangeEventGenerator; import com.linkedin.metadata.timeline.eventgenerator.DatasetPropertiesChangeEventGenerator; import com.linkedin.metadata.timeline.eventgenerator.DeprecationChangeEventGenerator; @@ -59,6 +60,7 @@ protected EntityChangeEventGeneratorRegistry entityChangeEventGeneratorRegistry( BUSINESS_ATTRIBUTE_INFO_ASPECT_NAME, new BusinessAttributeInfoChangeEventGenerator()); registry.register( BUSINESS_ATTRIBUTE_ASSOCIATION, new BusinessAttributeAssociationChangeEventGenerator()); + registry.register(BUSINESS_ATTRIBUTE_ASPECT, new BusinessAttributesChangeEventGenerator()); // Entity Lifecycle Differs registry.register(DATASET_KEY_ASPECT_NAME, new EntityKeyChangeEventGenerator<>()); diff --git a/smoke-test/smoke.sh b/smoke-test/smoke.sh index ec8188ebf5f4d..1d209b4ba8219 100755 --- a/smoke-test/smoke.sh +++ b/smoke-test/smoke.sh @@ -22,7 +22,9 @@ else echo "datahub:datahub" > ~/.datahub/plugins/frontend/auth/user.props python3 -m venv venv + set +x source venv/bin/activate + set -x python -m pip install --upgrade 'uv>=0.1.10' uv pip install -r requirements.txt fi