From d0d09a09f8b70054c68f26373663d0424bd81413 Mon Sep 17 00:00:00 2001 From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com> Date: Fri, 11 Oct 2024 16:55:27 +0530 Subject: [PATCH 01/50] fix(ingest): ignore irrelevant urns from % change computation (#11583) --- .../source/state/entity_removal_state.py | 26 ++++++++++++++++++- .../state/stale_entity_removal_handler.py | 10 +++---- .../test_stale_entity_removal_handler.py | 18 +++++++++++++ 3 files changed, 47 insertions(+), 7 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/state/entity_removal_state.py b/metadata-ingestion/src/datahub/ingestion/source/state/entity_removal_state.py index f011aa7bdd19e..318395d4e66b2 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/state/entity_removal_state.py +++ b/metadata-ingestion/src/datahub/ingestion/source/state/entity_removal_state.py @@ -8,6 +8,11 @@ from datahub.utilities.dedup_list import deduplicate_list from datahub.utilities.urns.urn import guess_entity_type +STATEFUL_INGESTION_IGNORED_ENTITY_TYPES = { + "dataProcessInstance", + "query", +} + def pydantic_state_migrator(mapping: Dict[str, str]) -> classmethod: # mapping would be something like: @@ -127,8 +132,11 @@ def get_percent_entities_changed( :param old_checkpoint_state: the old checkpoint state to compute the relative change percent against. :return: (1-|intersection(self, old_checkpoint_state)| / |old_checkpoint_state|) * 100.0 """ + + old_urns_filtered = filter_ignored_entity_types(old_checkpoint_state.urns) + return compute_percent_entities_changed( - new_entities=self.urns, old_entities=old_checkpoint_state.urns + new_entities=self.urns, old_entities=old_urns_filtered ) def urn_count(self) -> int: @@ -153,3 +161,19 @@ def _get_entity_overlap_and_cardinalities( new_set = set(new_entities) old_set = set(old_entities) return len(new_set.intersection(old_set)), len(old_set), len(new_set) + + +def filter_ignored_entity_types(urns: List[str]) -> List[str]: + # We previously stored ignored entity urns (e.g.dataProcessInstance) in state. + # For smoother transition from old checkpoint state, without requiring explicit + # setting of `fail_safe_threshold` due to removal of irrelevant urns from new state, + # here, we would ignore irrelevant urns from percentage entities changed computation + # This special handling can be removed after few months. + return [ + urn + for urn in urns + if not any( + urn.startswith(f"urn:li:{entityType}") + for entityType in STATEFUL_INGESTION_IGNORED_ENTITY_TYPES + ) + ] diff --git a/metadata-ingestion/src/datahub/ingestion/source/state/stale_entity_removal_handler.py b/metadata-ingestion/src/datahub/ingestion/source/state/stale_entity_removal_handler.py index 9d77e13a0f3c2..d4fcbf09924e9 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/state/stale_entity_removal_handler.py +++ b/metadata-ingestion/src/datahub/ingestion/source/state/stale_entity_removal_handler.py @@ -11,7 +11,10 @@ from datahub.ingestion.api.ingestion_job_checkpointing_provider_base import JobId from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.source.state.checkpoint import Checkpoint -from datahub.ingestion.source.state.entity_removal_state import GenericCheckpointState +from datahub.ingestion.source.state.entity_removal_state import ( + STATEFUL_INGESTION_IGNORED_ENTITY_TYPES, + GenericCheckpointState, +) from datahub.ingestion.source.state.stateful_ingestion_base import ( StatefulIngestionConfig, StatefulIngestionConfigBase, @@ -27,11 +30,6 @@ logger: logging.Logger = logging.getLogger(__name__) -STATEFUL_INGESTION_IGNORED_ENTITY_TYPES = { - "dataProcessInstance", - "query", -} - class StatefulStaleMetadataRemovalConfig(StatefulIngestionConfig): """ diff --git a/metadata-ingestion/tests/unit/stateful_ingestion/state/test_stale_entity_removal_handler.py b/metadata-ingestion/tests/unit/stateful_ingestion/state/test_stale_entity_removal_handler.py index 62a42b954daf8..be2d8bac12e38 100644 --- a/metadata-ingestion/tests/unit/stateful_ingestion/state/test_stale_entity_removal_handler.py +++ b/metadata-ingestion/tests/unit/stateful_ingestion/state/test_stale_entity_removal_handler.py @@ -4,6 +4,7 @@ from datahub.ingestion.source.state.entity_removal_state import ( compute_percent_entities_changed, + filter_ignored_entity_types, ) EntList = List[str] @@ -46,3 +47,20 @@ def test_change_percent( new_entities=new_entities, old_entities=old_entities ) assert actual_percent_change == expected_percent_change + + +def test_filter_ignored_entity_types(): + + assert filter_ignored_entity_types( + [ + "urn:li:dataset:(urn:li:dataPlatform:postgres,dummy_dataset1,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:postgres,dummy_dataset2,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:postgres,dummy_dataset3,PROD)", + "urn:li:dataProcessInstance:478810e859f870a54f72c681f41af619", + "urn:li:query:query1", + ] + ) == [ + "urn:li:dataset:(urn:li:dataPlatform:postgres,dummy_dataset1,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:postgres,dummy_dataset2,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:postgres,dummy_dataset3,PROD)", + ] From 59a2f708580459433fe2e61cb96433f04a79c564 Mon Sep 17 00:00:00 2001 From: sid-acryl <155424659+sid-acryl@users.noreply.github.com> Date: Fri, 11 Oct 2024 21:24:50 +0530 Subject: [PATCH 02/50] feat(ingest/powerbi): fix subTypes and add workspace_type_filter (#11523) Co-authored-by: Harshal Sheth --- .../docs/sources/powerbi/powerbi_pre.md | 4 +- .../ingestion/source/common/subtypes.py | 5 +- .../ingestion/source/powerbi/config.py | 27 +- .../ingestion/source/powerbi/powerbi.py | 80 +- .../powerbi/rest_api_wrapper/data_classes.py | 9 +- .../powerbi/rest_api_wrapper/data_resolver.py | 105 +- .../powerbi/rest_api_wrapper/powerbi_api.py | 73 +- .../powerbi/golden_test_admin_only.json | 73 +- .../integration/powerbi/golden_test_cll.json | 30 +- .../powerbi/golden_test_container.json | 1480 ++++++++++++++--- .../golden_test_disabled_ownership.json | 27 +- .../powerbi/golden_test_endorsement.json | 27 +- .../golden_test_independent_datasets.json | 15 +- .../powerbi/golden_test_ingest.json | 27 +- .../golden_test_ingest_patch_disabled.json | 27 +- .../powerbi/golden_test_lineage.json | 27 +- .../golden_test_lower_case_urn_ingest.json | 27 +- ..._config_and_modified_since_admin_only.json | 125 +- .../powerbi/golden_test_personal_ingest.json | 329 ++++ .../golden_test_platform_instance_ingest.json | 27 +- .../powerbi/golden_test_profiling.json | 7 +- .../powerbi/golden_test_report.json | 725 +++++++- .../golden_test_scan_all_workspaces.json | 27 +- ...lden_test_server_to_platform_instance.json | 27 +- .../powerbi/test_admin_only_api.py | 15 +- .../tests/integration/powerbi/test_powerbi.py | 208 ++- .../integration/powerbi/test_profiling.py | 12 +- .../powerbi/test_stateful_ingestion.py | 21 +- 28 files changed, 2870 insertions(+), 716 deletions(-) create mode 100644 metadata-ingestion/tests/integration/powerbi/golden_test_personal_ingest.json diff --git a/metadata-ingestion/docs/sources/powerbi/powerbi_pre.md b/metadata-ingestion/docs/sources/powerbi/powerbi_pre.md index b581e5fc8f70d..f2745d5e77f49 100644 --- a/metadata-ingestion/docs/sources/powerbi/powerbi_pre.md +++ b/metadata-ingestion/docs/sources/powerbi/powerbi_pre.md @@ -18,9 +18,11 @@ | `Report.webUrl` | `Chart.externalUrl` | | `Workspace` | `Container` | | `Report` | `Dashboard` | +| `PaginatedReport` | `Dashboard` | | `Page` | `Chart` | -If Tile is created from report then Chart.externalUrl is set to Report.webUrl. +- If `Tile` is created from report then `Chart.externalUrl` is set to Report.webUrl. +- The `Page` is unavailable for PowerBI PaginatedReport. ## Lineage diff --git a/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py b/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py index 86c1c8db11b05..b6aa8c1f5f1f1 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py +++ b/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py @@ -14,7 +14,6 @@ class DatasetSubTypes(StrEnum): ELASTIC_DATASTREAM = "Datastream" SALESFORCE_CUSTOM_OBJECT = "Custom Object" SALESFORCE_STANDARD_OBJECT = "Object" - POWERBI_DATASET_TABLE = "PowerBI Dataset Table" QLIK_DATASET = "Qlik Dataset" BIGQUERY_TABLE_SNAPSHOT = "Bigquery Table Snapshot" SHARDED_TABLE = "Sharded Table" @@ -48,8 +47,8 @@ class BIContainerSubTypes(StrEnum): LOOKML_PROJECT = "LookML Project" LOOKML_MODEL = "LookML Model" TABLEAU_WORKBOOK = "Workbook" - POWERBI_WORKSPACE = "Workspace" - POWERBI_DATASET = "PowerBI Dataset" + POWERBI_DATASET = "Semantic Model" + POWERBI_DATASET_TABLE = "Table" QLIK_SPACE = "Qlik Space" QLIK_APP = "Qlik App" SIGMA_WORKSPACE = "Sigma Workspace" diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py index 967dd5d81112d..522639a160781 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py @@ -1,7 +1,7 @@ import logging from dataclasses import dataclass, field as dataclass_field from enum import Enum -from typing import Dict, List, Optional, Union +from typing import Dict, List, Literal, Optional, Union import pydantic from pydantic import validator @@ -47,6 +47,7 @@ class Constant: WORKSPACE_ID = "workspaceId" DASHBOARD_ID = "powerbi.linkedin.com/dashboards/{}" DATASET_EXECUTE_QUERIES = "DATASET_EXECUTE_QUERIES_POST" + GET_WORKSPACE_APP = "GET_WORKSPACE_APP" DATASET_ID = "datasetId" REPORT_ID = "reportId" SCAN_ID = "ScanId" @@ -118,6 +119,15 @@ class Constant: CHART_COUNT = "chartCount" WORKSPACE_NAME = "workspaceName" DATASET_WEB_URL = "datasetWebUrl" + TYPE = "type" + REPORT_TYPE = "reportType" + LAST_UPDATE = "lastUpdate" + APP_ID = "appId" + REPORTS = "reports" + ORIGINAL_REPORT_OBJECT_ID = "originalReportObjectId" + APP_SUB_TYPE = "App" + STATE = "state" + ACTIVE = "Active" @dataclass @@ -273,7 +283,8 @@ class PowerBiDashboardSourceConfig( # PowerBi workspace identifier workspace_id_pattern: AllowDenyPattern = pydantic.Field( default=AllowDenyPattern.allow_all(), - description="Regex patterns to filter PowerBI workspaces in ingestion", + description="Regex patterns to filter PowerBI workspaces in ingestion." + " Note: This field works in conjunction with 'workspace_type_filter' and both must be considered when filtering workspaces.", ) # Dataset type mapping PowerBI support many type of data-sources. Here user need to define what type of PowerBI @@ -340,7 +351,7 @@ class PowerBiDashboardSourceConfig( ) modified_since: Optional[str] = pydantic.Field( default=None, - description="Get only recently modified workspaces based on modified_since datetime '2023-02-10T00:00:00.0000000Z', excludePersonalWorkspaces and excludeInActiveWorkspaces limit to last 30 days", + description="Get only recently modified workspaces based on modified_since datetime '2023-02-10T00:00:00.0000000Z', excludeInActiveWorkspaces limit to last 30 days", ) extract_dashboards: bool = pydantic.Field( default=True, @@ -445,6 +456,16 @@ class PowerBiDashboardSourceConfig( description="Patch dashboard metadata", ) + workspace_type_filter: List[ + Literal[ + "Workspace", "PersonalGroup", "Personal", "AdminWorkspace", "AdminInsights" + ] + ] = pydantic.Field( + default=["Workspace"], + description="Ingest the metadata of the workspace where the workspace type corresponds to the specified workspace_type_filter." + " Note: This field works in conjunction with 'workspace_id_pattern'. Both must be matched for a workspace to be processed.", + ) + @root_validator(skip_on_failure=True) def validate_extract_column_level_lineage(cls, values: Dict) -> Dict: flags = [ diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py index 065bbac9e9645..f5c0aedb329cd 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py @@ -34,7 +34,6 @@ from datahub.ingestion.source.common.subtypes import ( BIAssetSubTypes, BIContainerSubTypes, - DatasetSubTypes, ) from datahub.ingestion.source.powerbi.config import ( Constant, @@ -142,9 +141,7 @@ def assets_urn_to_lowercase(self, value): def new_mcp( self, - entity_type, entity_urn, - aspect_name, aspect, change_type=ChangeTypeClass.UPSERT, ): @@ -152,10 +149,8 @@ def new_mcp( Create MCP """ return MetadataChangeProposalWrapper( - entityType=entity_type, changeType=change_type, entityUrn=entity_urn, - aspectName=aspect_name, aspect=aspect, ) @@ -176,9 +171,7 @@ def extract_dataset_schema( ) -> List[MetadataChangeProposalWrapper]: schema_metadata = self.to_datahub_schema(table) schema_mcp = self.new_mcp( - entity_type=Constant.DATASET, entity_urn=ds_urn, - aspect_name=Constant.SCHEMA_METADATA, aspect=schema_metadata, ) return [schema_mcp] @@ -409,9 +402,7 @@ def to_datahub_dataset( viewLanguage="m_query", ) view_prop_mcp = self.new_mcp( - entity_type=Constant.DATASET, entity_urn=ds_urn, - aspect_name=Constant.VIEW_PROPERTIES, aspect=view_properties, ) dataset_mcps.extend([view_prop_mcp]) @@ -425,30 +416,23 @@ def to_datahub_dataset( ) info_mcp = self.new_mcp( - entity_type=Constant.DATASET, entity_urn=ds_urn, - aspect_name=Constant.DATASET_PROPERTIES, aspect=ds_properties, ) # Remove status mcp status_mcp = self.new_mcp( - entity_type=Constant.DATASET, entity_urn=ds_urn, - aspect_name=Constant.STATUS, aspect=StatusClass(removed=False), ) if self.__config.extract_dataset_schema: dataset_mcps.extend(self.extract_dataset_schema(table, ds_urn)) subtype_mcp = self.new_mcp( - entity_type=Constant.DATASET, entity_urn=ds_urn, - aspect_name=Constant.SUBTYPES, aspect=SubTypesClass( typeNames=[ - DatasetSubTypes.POWERBI_DATASET_TABLE, - DatasetSubTypes.VIEW, + BIContainerSubTypes.POWERBI_DATASET_TABLE, ] ), ) @@ -464,9 +448,7 @@ def to_datahub_dataset( # Dashboard owner MCP ownership = OwnershipClass(owners=[owner_class]) owner_mcp = self.new_mcp( - entity_type=Constant.DATASET, entity_urn=ds_urn, - aspect_name=Constant.OWNERSHIP, aspect=ownership, ) dataset_mcps.extend([owner_mcp]) @@ -606,17 +588,13 @@ def tile_custom_properties(tile: powerbi_data_classes.Tile) -> dict: ) info_mcp = self.new_mcp( - entity_type=Constant.CHART, entity_urn=chart_urn, - aspect_name=Constant.CHART_INFO, aspect=chart_info_instance, ) # removed status mcp status_mcp = self.new_mcp( - entity_type=Constant.CHART, entity_urn=chart_urn, - aspect_name=Constant.STATUS, aspect=StatusClass(removed=False), ) @@ -633,18 +611,14 @@ def tile_custom_properties(tile: powerbi_data_classes.Tile) -> dict: # Explicitly emitting this aspect isn't necessary, but we do it here to ensure that # the old, bad data gets overwritten. chart_key_mcp = self.new_mcp( - entity_type=Constant.CHART, entity_urn=chart_urn, - aspect_name=Constant.CHART_KEY, aspect=ChartUrn.from_string(chart_urn).to_key_aspect(), ) # Browse path browse_path = BrowsePathsClass(paths=[f"/powerbi/{workspace.name}"]) browse_path_mcp = self.new_mcp( - entity_type=Constant.CHART, entity_urn=chart_urn, - aspect_name=Constant.BROWSERPATH, aspect=browse_path, ) result_mcps = [ @@ -710,17 +684,13 @@ def chart_custom_properties(dashboard: powerbi_data_classes.Dashboard) -> dict: ) info_mcp = self.new_mcp( - entity_type=Constant.DASHBOARD, entity_urn=dashboard_urn, - aspect_name=Constant.DASHBOARD_INFO, aspect=dashboard_info_cls, ) # removed status mcp removed_status_mcp = self.new_mcp( - entity_type=Constant.DASHBOARD, entity_urn=dashboard_urn, - aspect_name=Constant.STATUS, aspect=StatusClass(removed=False), ) @@ -732,9 +702,7 @@ def chart_custom_properties(dashboard: powerbi_data_classes.Dashboard) -> dict: # Dashboard key dashboard_key_mcp = self.new_mcp( - entity_type=Constant.DASHBOARD, entity_urn=dashboard_urn, - aspect_name=Constant.DASHBOARD_KEY, aspect=dashboard_key_cls, ) @@ -750,9 +718,7 @@ def chart_custom_properties(dashboard: powerbi_data_classes.Dashboard) -> dict: # Dashboard owner MCP ownership = OwnershipClass(owners=owners) owner_mcp = self.new_mcp( - entity_type=Constant.DASHBOARD, entity_urn=dashboard_urn, - aspect_name=Constant.OWNERSHIP, aspect=ownership, ) @@ -761,9 +727,7 @@ def chart_custom_properties(dashboard: powerbi_data_classes.Dashboard) -> dict: paths=[f"/{Constant.PLATFORM_NAME}/{dashboard.workspace_name}"] ) browse_path_mcp = self.new_mcp( - entity_type=Constant.DASHBOARD, entity_urn=dashboard_urn, - aspect_name=Constant.BROWSERPATH, aspect=browse_path, ) @@ -827,7 +791,7 @@ def generate_container_for_workspace( container_work_units = gen_containers( container_key=self.workspace_key, name=workspace.name, - sub_types=[BIContainerSubTypes.POWERBI_WORKSPACE], + sub_types=[workspace.type], ) return container_work_units @@ -858,9 +822,7 @@ def append_tag_mcp( ) -> None: if self.__config.extract_endorsements_to_tags and tags: tags_mcp = self.new_mcp( - entity_type=entity_type, entity_urn=entity_urn, - aspect_name=Constant.GLOBAL_TAGS, aspect=self.transform_tags(tags), ) list_of_mcps.append(tags_mcp) @@ -883,9 +845,7 @@ def to_datahub_user( user_key = CorpUserKeyClass(username=user.id) user_key_mcp = self.new_mcp( - entity_type=Constant.CORP_USER, entity_urn=user_urn, - aspect_name=Constant.CORP_USER_KEY, aspect=user_key, ) @@ -1028,17 +988,13 @@ def to_chart_mcps( ) info_mcp = self.new_mcp( - entity_type=Constant.CHART, entity_urn=chart_urn, - aspect_name=Constant.CHART_INFO, aspect=chart_info_instance, ) # removed status mcp status_mcp = self.new_mcp( - entity_type=Constant.CHART, entity_urn=chart_urn, - aspect_name=Constant.STATUS, aspect=StatusClass(removed=False), ) # Subtype mcp @@ -1052,9 +1008,7 @@ def to_chart_mcps( # Browse path browse_path = BrowsePathsClass(paths=[f"/powerbi/{workspace.name}"]) browse_path_mcp = self.new_mcp( - entity_type=Constant.CHART, entity_urn=chart_urn, - aspect_name=Constant.BROWSERPATH, aspect=browse_path, ) list_of_mcps = [info_mcp, status_mcp, subtype_mcp, browse_path_mcp] @@ -1105,17 +1059,13 @@ def report_to_dashboard( ) info_mcp = self.new_mcp( - entity_type=Constant.DASHBOARD, entity_urn=dashboard_urn, - aspect_name=Constant.DASHBOARD_INFO, aspect=dashboard_info_cls, ) # removed status mcp removed_status_mcp = self.new_mcp( - entity_type=Constant.DASHBOARD, entity_urn=dashboard_urn, - aspect_name=Constant.STATUS, aspect=StatusClass(removed=False), ) @@ -1127,9 +1077,7 @@ def report_to_dashboard( # Dashboard key dashboard_key_mcp = self.new_mcp( - entity_type=Constant.DASHBOARD, entity_urn=dashboard_urn, - aspect_name=Constant.DASHBOARD_KEY, aspect=dashboard_key_cls, ) # Report Ownership @@ -1144,9 +1092,7 @@ def report_to_dashboard( # Report owner MCP ownership = OwnershipClass(owners=owners) owner_mcp = self.new_mcp( - entity_type=Constant.DASHBOARD, entity_urn=dashboard_urn, - aspect_name=Constant.OWNERSHIP, aspect=ownership, ) @@ -1155,17 +1101,13 @@ def report_to_dashboard( paths=[f"/{Constant.PLATFORM_NAME}/{workspace.name}"] ) browse_path_mcp = self.new_mcp( - entity_type=Constant.DASHBOARD, entity_urn=dashboard_urn, - aspect_name=Constant.BROWSERPATH, aspect=browse_path, ) sub_type_mcp = self.new_mcp( - entity_type=Constant.DASHBOARD, entity_urn=dashboard_urn, - aspect_name=SubTypesClass.ASPECT_NAME, - aspect=SubTypesClass(typeNames=[Constant.REPORT_TYPE_NAME]), + aspect=SubTypesClass(typeNames=[report.type.value]), ) list_of_mcps = [ @@ -1203,7 +1145,7 @@ def report_to_datahub_work_units( logger.debug(f"Converting report={report.name} to datahub dashboard") # Convert user to CorpUser user_mcps = self.to_datahub_users(report.users) - # Convert pages to charts. A report has single dataset and same dataset used in pages to create visualization + # Convert pages to charts. A report has a single dataset and the same dataset used in pages to create visualization ds_mcps = self.to_datahub_dataset(report.dataset, workspace) chart_mcps = self.pages_to_chart(report.pages, workspace, ds_mcps) @@ -1267,7 +1209,10 @@ def __init__(self, config: PowerBiDashboardSourceConfig, ctx: PipelineContext): self.source_config ) try: - self.powerbi_client = PowerBiAPI(self.source_config) + self.powerbi_client = PowerBiAPI( + config=self.source_config, + reporter=self.reporter, + ) except Exception as e: logger.warning(e) exit( @@ -1288,7 +1233,10 @@ def __init__(self, config: PowerBiDashboardSourceConfig, ctx: PipelineContext): def test_connection(config_dict: dict) -> TestConnectionReport: test_report = TestConnectionReport() try: - PowerBiAPI(PowerBiDashboardSourceConfig.parse_obj_allow_extras(config_dict)) + PowerBiAPI( + PowerBiDashboardSourceConfig.parse_obj_allow_extras(config_dict), + PowerBiDashboardSourceReport(), + ) test_report.basic_connectivity = CapabilityReport(capable=True) except Exception as e: test_report.basic_connectivity = CapabilityReport( @@ -1308,6 +1256,7 @@ def get_allowed_workspaces(self) -> List[powerbi_data_classes.Workspace]: workspace for workspace in all_workspaces if self.source_config.workspace_id_pattern.allowed(workspace.id) + and workspace.type in self.source_config.workspace_type_filter ] logger.info(f"Number of workspaces = {len(all_workspaces)}") @@ -1366,8 +1315,9 @@ def get_workspace_workunit( ) for workunit in workspace_workunits: - # Return workunit to Datahub Ingestion framework + # Return workunit to a Datahub Ingestion framework yield workunit + for dashboard in workspace.dashboards: try: # Fetch PowerBi users for dashboards diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py index 5106b9817d351..fb0959ac604c4 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py @@ -41,6 +41,7 @@ class DatasetKey(ContainerKey): class Workspace: id: str name: str + type: str # This is used as a subtype of the Container entity. dashboards: List["Dashboard"] reports: List["Report"] datasets: Dict[str, "PowerBIDataset"] @@ -211,10 +212,16 @@ def __hash__(self): return hash(self.__members()) +class ReportType(Enum): + PaginatedReport = "PaginatedReport" + PowerBIReport = "Report" + + @dataclass class Report: id: str name: str + type: ReportType webUrl: Optional[str] embedUrl: str description: str @@ -259,7 +266,7 @@ class Dashboard: tiles: List["Tile"] users: List["User"] tags: List[str] - webUrl: Optional[str] = None + webUrl: Optional[str] def get_urn_part(self): return f"dashboards.{self.id}" diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py index b190cf065b6e3..d89b9662d12ed 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py @@ -1,9 +1,8 @@ import logging -import math from abc import ABC, abstractmethod from datetime import datetime, timedelta from time import sleep -from typing import Any, Dict, List, Optional, Union +from typing import Any, Dict, Iterator, List, Optional, Union import msal import requests @@ -21,6 +20,7 @@ Page, PowerBIDataset, Report, + ReportType, Table, Tile, User, @@ -57,7 +57,8 @@ def is_http_failure(response: Response, message: str) -> bool: class DataResolverBase(ABC): SCOPE: str = "https://analysis.windows.net/powerbi/api/.default" - BASE_URL: str = "https://api.powerbi.com/v1.0/myorg/groups" + MY_ORG_URL = "https://api.powerbi.com/v1.0/myorg" + BASE_URL: str = f"{MY_ORG_URL}/groups" ADMIN_BASE_URL: str = "https://api.powerbi.com/v1.0/myorg/admin" AUTHORITY: str = "https://login.microsoftonline.com/" TOP: int = 1000 @@ -222,49 +223,27 @@ def get_dashboards(self, workspace: Workspace) -> List[Dashboard]: tags=[], ) for instance in dashboards_dict - if instance is not None + if ( + instance is not None + and Constant.APP_ID + not in instance # As we add dashboards to the App, Power BI starts + # providing duplicate dashboard information, + # where the duplicate includes an AppId, while the original dashboard does not. + ) ] return dashboards - def get_groups(self) -> List[dict]: + def get_groups(self, filter_: Dict) -> List[dict]: group_endpoint = self.get_groups_endpoint() - params: dict = {"$top": self.TOP, "$skip": 0, "$filter": "type eq 'Workspace'"} - - def fetch_page(page_number: int) -> dict: - params["$skip"] = self.TOP * page_number - logger.debug(f"Query parameters = {params}") - response = self._request_session.get( - group_endpoint, - headers=self.get_authorization_header(), - params=params, - ) - response.raise_for_status() - return response.json() - # Hit PowerBi - logger.debug(f"Request to groups endpoint URL={group_endpoint}") - zeroth_page = fetch_page(0) - logger.debug(f"Page 0 = {zeroth_page}") - if zeroth_page.get(Constant.ODATA_COUNT) is None: - logger.warning( - "@odata.count field is not present in response. Unable to fetch workspaces." - ) - return [] + output: List[dict] = [] - number_of_items = zeroth_page[Constant.ODATA_COUNT] - number_of_pages = math.ceil(number_of_items / self.TOP) - output: List[dict] = zeroth_page[Constant.VALUE] - for page in range( - 1, number_of_pages - ): # start from 1 as 0th index already fetched - page_response = fetch_page(page) - if len(page_response[Constant.VALUE]) == 0: - break - - logger.debug(f"Page {page} = {zeroth_page}") - - output.extend(page_response[Constant.VALUE]) + for page in self.itr_pages( + endpoint=group_endpoint, + parameter_override=filter_, + ): + output.extend(page) return output @@ -286,13 +265,14 @@ def fetch_reports(): ) response.raise_for_status() response_dict = response.json() - logger.debug(f"Request response = {response_dict}") + logger.debug(f"Report Request response = {response_dict}") return response_dict.get(Constant.VALUE, []) reports: List[Report] = [ Report( id=raw_instance.get(Constant.ID), name=raw_instance.get(Constant.NAME), + type=ReportType[raw_instance.get(Constant.REPORT_TYPE)], webUrl=raw_instance.get(Constant.WEB_URL), embedUrl=raw_instance.get(Constant.EMBED_URL), description=raw_instance.get(Constant.DESCRIPTION, ""), @@ -304,6 +284,11 @@ def fetch_reports(): dataset=workspace.datasets.get(raw_instance.get(Constant.DATASET_ID)), ) for raw_instance in fetch_reports() + if Constant.APP_ID + not in raw_instance # As we add reports to the App, Power BI starts providing + # duplicate report information, + # where the duplicate includes an AppId, + # while the original report does not. ] return reports @@ -395,6 +380,40 @@ def new_dataset_or_report(tile_instance: Any) -> dict: return tiles + def itr_pages( + self, + endpoint: str, + parameter_override: Dict = {}, + ) -> Iterator[List[Dict]]: + params: dict = { + "$skip": 0, + "$top": self.TOP, + **parameter_override, + } + + page_number: int = 0 + + while True: + params["$skip"] = self.TOP * page_number + response = self._request_session.get( + endpoint, + headers=self.get_authorization_header(), + params=params, + ) + + response.raise_for_status() + + assert ( + Constant.VALUE in response.json() + ), "'value' key is not present in paginated response" + + if not response.json()[Constant.VALUE]: # if it is an empty list then break + break + + yield response.json()[Constant.VALUE] + + page_number += 1 + class RegularAPIResolver(DataResolverBase): # Regular access endpoints @@ -407,6 +426,7 @@ class RegularAPIResolver(DataResolverBase): Constant.REPORT_LIST: "{POWERBI_BASE_URL}/{WORKSPACE_ID}/reports", Constant.PAGE_BY_REPORT: "{POWERBI_BASE_URL}/{WORKSPACE_ID}/reports/{REPORT_ID}/pages", Constant.DATASET_EXECUTE_QUERIES: "{POWERBI_BASE_URL}/{WORKSPACE_ID}/datasets/{DATASET_ID}/executeQueries", + Constant.GET_WORKSPACE_APP: "{MY_ORG_URL}/apps/{APP_ID}", } def get_dataset( @@ -676,6 +696,7 @@ class AdminAPIResolver(DataResolverBase): Constant.ENTITY_USER_LIST: "{POWERBI_ADMIN_BASE_URL}/{ENTITY}/{ENTITY_ID}/users", Constant.DATASET_LIST: "{POWERBI_ADMIN_BASE_URL}/groups/{WORKSPACE_ID}/datasets", Constant.WORKSPACE_MODIFIED_LIST: "{POWERBI_ADMIN_BASE_URL}/workspaces/modified", + Constant.GET_WORKSPACE_APP: "{POWERBI_ADMIN_BASE_URL}/apps", } def create_scan_job(self, workspace_ids: List[str]) -> str: @@ -922,7 +943,7 @@ def _get_pages_by_report(self, workspace: Workspace, report_id: str) -> List[Pag def get_modified_workspaces(self, modified_since: str) -> List[str]: """ - Get list of modified workspaces + Get a list of modified workspaces """ modified_workspaces_endpoint = self.API_ENDPOINTS[ Constant.WORKSPACE_MODIFIED_LIST @@ -930,7 +951,7 @@ def get_modified_workspaces(self, modified_since: str) -> List[str]: POWERBI_ADMIN_BASE_URL=DataResolverBase.ADMIN_BASE_URL, ) parameters: Dict[str, Any] = { - "excludePersonalWorkspaces": True, + "excludePersonalWorkspaces": False, "excludeInActiveWorkspaces": True, "modifiedSince": modified_since, } diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py index a245d4c2b9a35..25e97b158d48b 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py @@ -32,8 +32,13 @@ class PowerBiAPI: - def __init__(self, config: PowerBiDashboardSourceConfig) -> None: + def __init__( + self, + config: PowerBiDashboardSourceConfig, + reporter: PowerBiDashboardSourceReport, + ) -> None: self.__config: PowerBiDashboardSourceConfig = config + self.__reporter = reporter self.__regular_api_resolver = RegularAPIResolver( client_id=self.__config.client_id, @@ -182,17 +187,27 @@ def fill_tags() -> None: fill_ownership() fill_tags() - return reports def get_workspaces(self) -> List[Workspace]: + modified_workspace_ids: List[str] = [] + if self.__config.modified_since: - workspaces = self.get_modified_workspaces() - return workspaces + modified_workspace_ids = self.get_modified_workspaces() groups: List[dict] = [] + filter_: Dict[str, str] = {} try: - groups = self._get_resolver().get_groups() + if modified_workspace_ids: + id_filter: List[str] = [] + + for id_ in modified_workspace_ids: + id_filter.append(f"id eq {id_}") + + filter_["$filter"] = " or ".join(id_filter) + + groups = self._get_resolver().get_groups(filter_=filter_) + except: self.log_http_error(message="Unable to fetch list of workspaces") raise # we want this exception to bubble up @@ -201,6 +216,7 @@ def get_workspaces(self) -> List[Workspace]: Workspace( id=workspace[Constant.ID], name=workspace[Constant.NAME], + type=workspace[Constant.TYPE], datasets={}, dashboards=[], reports=[], @@ -213,34 +229,20 @@ def get_workspaces(self) -> List[Workspace]: ] return workspaces - def get_modified_workspaces(self) -> List[Workspace]: - workspaces: List[Workspace] = [] + def get_modified_workspaces(self) -> List[str]: + modified_workspace_ids: List[str] = [] if self.__config.modified_since is None: - return workspaces + return modified_workspace_ids try: modified_workspace_ids = self.__admin_api_resolver.get_modified_workspaces( self.__config.modified_since ) - workspaces = [ - Workspace( - id=workspace_id, - name="", - datasets={}, - dashboards=[], - reports=[], - report_endorsements={}, - dashboard_endorsements={}, - scan_result={}, - independent_datasets=[], - ) - for workspace_id in modified_workspace_ids - ] except: self.log_http_error(message="Unable to fetch list of modified workspaces.") - return workspaces + return modified_workspace_ids def _get_scan_result(self, workspace_ids: List[str]) -> Any: scan_id: Optional[str] = None @@ -389,9 +391,28 @@ def _fill_metadata_from_scan_result( workspaces = [] for workspace_metadata in scan_result["workspaces"]: + if ( + workspace_metadata.get(Constant.STATE) != Constant.ACTIVE + or workspace_metadata.get(Constant.TYPE) + not in self.__config.workspace_type_filter + ): + # if the state is not "Active" then in some state like Not Found, "name" attribute is not present + wrk_identifier: str = ( + workspace_metadata[Constant.NAME] + if workspace_metadata.get(Constant.NAME) + else workspace_metadata.get(Constant.ID) + ) + self.__reporter.info( + title="Skipped Workspace", + message="Workspace was skipped due to the workspace_type_filter", + context=f"workspace={wrk_identifier}", + ) + continue + cur_workspace = Workspace( - id=workspace_metadata["id"], - name=workspace_metadata["name"], + id=workspace_metadata[Constant.ID], + name=workspace_metadata[Constant.NAME], + type=workspace_metadata[Constant.TYPE], datasets={}, dashboards=[], reports=[], @@ -403,7 +424,7 @@ def _fill_metadata_from_scan_result( cur_workspace.scan_result = workspace_metadata cur_workspace.datasets = self._get_workspace_datasets(cur_workspace) - # Fetch endorsements tag if it is enabled from configuration + # Fetch endorsement tag if it is enabled from configuration if self.__config.extract_endorsements_to_tags: cur_workspace.dashboard_endorsements = self._get_dashboard_endorsements( cur_workspace.scan_result diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_admin_only.json b/metadata-ingestion/tests/integration/powerbi/golden_test_admin_only.json index fa4bcb8abaa94..5cfa4ec80c643 100644 --- a/metadata-ingestion/tests/integration/powerbi/golden_test_admin_only.json +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_admin_only.json @@ -63,8 +63,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -138,8 +137,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -238,8 +236,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -338,8 +335,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -446,8 +442,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -546,8 +541,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -646,8 +640,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -746,8 +739,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -846,8 +838,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -1118,11 +1109,12 @@ "entityType": "chart", "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "chartKey", + "aspectName": "subTypes", "aspect": { "json": { - "dashboardTool": "powerbi", - "chartId": "charts.23212598-23b5-4980-87cc-5fc0ecd84385" + "typeNames": [ + "PowerBI Tile" + ] } }, "systemMetadata": { @@ -1135,12 +1127,11 @@ "entityType": "chart", "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "chartKey", "aspect": { "json": { - "typeNames": [ - "PowerBI Tile" - ] + "dashboardTool": "powerbi", + "chartId": "charts.23212598-23b5-4980-87cc-5fc0ecd84385" } }, "systemMetadata": { @@ -1416,8 +1407,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -1491,8 +1481,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -1591,8 +1580,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -1691,8 +1679,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -1799,8 +1786,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -1899,8 +1885,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -1994,14 +1979,19 @@ "json": [ { "op": "add", - "path": "/dashboardUrl", - "value": "https://app.powerbi.com/groups/f089354e-8366-4e18-aea3-4cb4a3a50b48/reports/5b218778-e7a5-4d73-8187-f10824047715" + "path": "/title", + "value": "SalesMarketing" }, { "op": "add", "path": "/description", "value": "Acryl sales marketing report" }, + { + "op": "add", + "path": "/dashboardUrl", + "value": "https://app.powerbi.com/groups/f089354e-8366-4e18-aea3-4cb4a3a50b48/reports/5b218778-e7a5-4d73-8187-f10824047715" + }, { "op": "add", "path": "/lastModified", @@ -2015,11 +2005,6 @@ "actor": "urn:li:corpuser:unknown" } } - }, - { - "op": "add", - "path": "/title", - "value": "SalesMarketing" } ] }, diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_cll.json b/metadata-ingestion/tests/integration/powerbi/golden_test_cll.json index 60b36897ed2e4..66ee60c2eebb3 100644 --- a/metadata-ingestion/tests/integration/powerbi/golden_test_cll.json +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_cll.json @@ -63,8 +63,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -138,8 +137,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -238,8 +236,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -364,8 +361,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -464,8 +460,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -596,8 +591,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -696,8 +690,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -796,8 +789,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -896,8 +888,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -1465,8 +1456,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_container.json b/metadata-ingestion/tests/integration/powerbi/golden_test_container.json index b43e4a6c2c1c2..e8be3aa9c0ac7 100644 --- a/metadata-ingestion/tests/integration/powerbi/golden_test_container.json +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_container.json @@ -122,15 +122,13 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", + "entityType": "container", + "entityUrn": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "materialized": false, - "viewLogic": "dummy", - "viewLanguage": "m_query" + "platform": "urn:li:dataPlatform:powerbi" } }, "systemMetadata": { @@ -140,19 +138,15 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", + "entityType": "container", + "entityUrn": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "subTypes", "aspect": { "json": { - "customProperties": { - "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" - }, - "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "public issue_history", - "description": "Library dataset description", - "tags": [] + "typeNames": [ + "Semantic Model" + ] } }, "systemMetadata": { @@ -165,10 +159,10 @@ "entityType": "container", "entityUrn": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "container", "aspect": { "json": { - "platform": "urn:li:dataPlatform:powerbi" + "container": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9" } }, "systemMetadata": { @@ -181,11 +175,14 @@ "entityType": "container", "entityUrn": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "PowerBI Dataset" + "path": [ + { + "id": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9", + "urn": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9" + } ] } }, @@ -196,18 +193,15 @@ } }, { - "entityType": "container", - "entityUrn": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "viewProperties", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9", - "urn": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9" - } - ] + "materialized": false, + "viewLogic": "dummy", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -217,13 +211,19 @@ } }, { - "entityType": "container", - "entityUrn": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "datasetProperties", "aspect": { "json": { - "container": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9" + "customProperties": { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", + "name": "public issue_history", + "description": "Library dataset description", + "tags": [] } }, "systemMetadata": { @@ -256,8 +256,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -372,8 +371,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -488,8 +486,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -604,8 +601,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -720,8 +716,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -836,8 +831,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -952,8 +946,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -1041,15 +1034,13 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", + "entityType": "container", + "entityUrn": "urn:li:container:977b804137a1d2bf897ff1bbf440a1cc", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = Sql.Database(\"localhost\", \"library\"),\n dbo_book_issue = Source{[Schema=\"dbo\",Item=\"book_issue\"]}[Data]\n in dbo_book_issue", - "viewLanguage": "m_query" + "platform": "urn:li:dataPlatform:powerbi" } }, "systemMetadata": { @@ -1059,19 +1050,15 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", + "entityType": "container", + "entityUrn": "urn:li:container:977b804137a1d2bf897ff1bbf440a1cc", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "subTypes", "aspect": { "json": { - "customProperties": { - "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed" - }, - "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details", - "name": "dbo_book_issue", - "description": "hr pbi test description", - "tags": [] + "typeNames": [ + "Semantic Model" + ] } }, "systemMetadata": { @@ -1084,10 +1071,10 @@ "entityType": "container", "entityUrn": "urn:li:container:977b804137a1d2bf897ff1bbf440a1cc", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "container", "aspect": { "json": { - "platform": "urn:li:dataPlatform:powerbi" + "container": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9" } }, "systemMetadata": { @@ -1100,11 +1087,14 @@ "entityType": "container", "entityUrn": "urn:li:container:977b804137a1d2bf897ff1bbf440a1cc", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "PowerBI Dataset" + "path": [ + { + "id": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9", + "urn": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9" + } ] } }, @@ -1115,18 +1105,15 @@ } }, { - "entityType": "container", - "entityUrn": "urn:li:container:977b804137a1d2bf897ff1bbf440a1cc", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "viewProperties", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9", - "urn": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9" - } - ] + "materialized": false, + "viewLogic": "let\n Source = Sql.Database(\"localhost\", \"library\"),\n dbo_book_issue = Source{[Schema=\"dbo\",Item=\"book_issue\"]}[Data]\n in dbo_book_issue", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -1136,13 +1123,19 @@ } }, { - "entityType": "container", - "entityUrn": "urn:li:container:977b804137a1d2bf897ff1bbf440a1cc", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "datasetProperties", "aspect": { "json": { - "container": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9" + "customProperties": { + "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details", + "name": "dbo_book_issue", + "description": "hr pbi test description", + "tags": [] } }, "systemMetadata": { @@ -1175,8 +1168,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -1291,8 +1283,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -1870,15 +1861,17 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", + "entityType": "container", + "entityUrn": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "containerProperties", "aspect": { "json": { - "materialized": false, - "viewLogic": "dummy", - "viewLanguage": "m_query" + "customProperties": { + "platform": "powerbi", + "dataset": "05169CD2-E713-41E6-9600-1D8066D95445" + }, + "name": "library-dataset" } }, "systemMetadata": { @@ -1888,19 +1881,13 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", + "entityType": "container", + "entityUrn": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "status", "aspect": { "json": { - "customProperties": { - "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" - }, - "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "public issue_history", - "description": "Library dataset description", - "tags": [] + "removed": false } }, "systemMetadata": { @@ -1910,13 +1897,13 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", + "entityType": "container", + "entityUrn": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "removed": false + "platform": "urn:li:dataPlatform:powerbi" } }, "systemMetadata": { @@ -1926,15 +1913,14 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", + "entityType": "container", + "entityUrn": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Semantic Model" ] } }, @@ -1944,15 +1930,31 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "container", + "entityUrn": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { "json": { "materialized": false, - "viewLogic": "let\n Source = Snowflake.Databases(\"hp123rt5.ap-southeast-2.fakecomputing.com\",\"PBI_TEST_WAREHOUSE_PROD\",[Role=\"PBI_TEST_MEMBER\"]),\n PBI_TEST_Database = Source{[Name=\"PBI_TEST\",Kind=\"Database\"]}[Data],\n TEST_Schema = PBI_TEST_Database{[Name=\"TEST\",Kind=\"Schema\"]}[Data],\n TESTTABLE_Table = TEST_Schema{[Name=\"TESTTABLE\",Kind=\"Table\"]}[Data]\nin\n TESTTABLE_Table", + "viewLogic": "dummy", "viewLanguage": "m_query" } }, @@ -1964,7 +1966,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { @@ -1973,7 +1975,7 @@ "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" }, "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "SNOWFLAKE_TESTTABLE", + "name": "public issue_history", "description": "Library dataset description", "tags": [] } @@ -1986,7 +1988,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -2002,14 +2004,13 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -2021,13 +2022,29 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { "json": { "materialized": false, - "viewLogic": "let\n Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"SELECT#(lf)concat((UPPER(REPLACE(SELLER,'-',''))), MONTHID) as AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTHID) as CD_AGENT_KEY,#(lf) *#(lf)FROM#(lf)OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_APS_SME_UNITS_V4\", null, [EnableFolding=true]),\n #\"Added Conditional Column\" = Table.AddColumn(Source, \"SME Units ENT\", each if [DEAL_TYPE] = \"SME Unit\" then [UNIT] else 0),\n #\"Added Conditional Column1\" = Table.AddColumn(#\"Added Conditional Column\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" then [UNIT] else 0),\n #\"Removed Columns\" = Table.RemoveColumns(#\"Added Conditional Column1\",{\"Banklink Units\"}),\n #\"Added Custom\" = Table.AddColumn(#\"Removed Columns\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" and [SALES_TYPE] = \"3 - Upsell\"\nthen [UNIT]\n\nelse if [SALES_TYPE] = \"Adjusted BL Migration\"\nthen [UNIT]\n\nelse 0),\n #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"SME Units in $ (*$361)\", each if [DEAL_TYPE] = \"SME Unit\" \nand [SALES_TYPE] <> \"4 - Renewal\"\n then [UNIT] * 361\nelse 0),\n #\"Added Custom2\" = Table.AddColumn(#\"Added Custom1\", \"Banklink in $ (*$148)\", each [Banklink Units] * 148)\nin\n #\"Added Custom2\"", + "viewLogic": "let\n Source = Snowflake.Databases(\"hp123rt5.ap-southeast-2.fakecomputing.com\",\"PBI_TEST_WAREHOUSE_PROD\",[Role=\"PBI_TEST_MEMBER\"]),\n PBI_TEST_Database = Source{[Name=\"PBI_TEST\",Kind=\"Database\"]}[Data],\n TEST_Schema = PBI_TEST_Database{[Name=\"TEST\",Kind=\"Schema\"]}[Data],\n TESTTABLE_Table = TEST_Schema{[Name=\"TESTTABLE\",Kind=\"Table\"]}[Data]\nin\n TESTTABLE_Table", "viewLanguage": "m_query" } }, @@ -2039,7 +2056,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { @@ -2048,7 +2065,7 @@ "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" }, "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "snowflake native-query", + "name": "SNOWFLAKE_TESTTABLE", "description": "Library dataset description", "tags": [] } @@ -2061,7 +2078,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -2077,14 +2094,13 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -2096,13 +2112,29 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { "json": { "materialized": false, - "viewLogic": "let\n Source = GoogleBigQuery.Database([BillingProject = #\"Parameter - Source\"]),\n#\"gcp-project\" = Source{[Name=#\"Parameter - Source\"]}[Data],\nuniversal_Schema = #\"gcp-project\"{[Name=\"universal\",Kind=\"Schema\"]}[Data],\nD_WH_DATE_Table = universal_Schema{[Name=\"D_WH_DATE\",Kind=\"Table\"]}[Data],\n#\"Filtered Rows\" = Table.SelectRows(D_WH_DATE_Table, each [D_DATE] > #datetime(2019, 9, 10, 0, 0, 0)),\n#\"Filtered Rows1\" = Table.SelectRows(#\"Filtered Rows\", each DateTime.IsInPreviousNHours([D_DATE], 87600))\n in \n#\"Filtered Rows1\"", + "viewLogic": "let\n Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"SELECT#(lf)concat((UPPER(REPLACE(SELLER,'-',''))), MONTHID) as AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTHID) as CD_AGENT_KEY,#(lf) *#(lf)FROM#(lf)OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_APS_SME_UNITS_V4\", null, [EnableFolding=true]),\n #\"Added Conditional Column\" = Table.AddColumn(Source, \"SME Units ENT\", each if [DEAL_TYPE] = \"SME Unit\" then [UNIT] else 0),\n #\"Added Conditional Column1\" = Table.AddColumn(#\"Added Conditional Column\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" then [UNIT] else 0),\n #\"Removed Columns\" = Table.RemoveColumns(#\"Added Conditional Column1\",{\"Banklink Units\"}),\n #\"Added Custom\" = Table.AddColumn(#\"Removed Columns\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" and [SALES_TYPE] = \"3 - Upsell\"\nthen [UNIT]\n\nelse if [SALES_TYPE] = \"Adjusted BL Migration\"\nthen [UNIT]\n\nelse 0),\n #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"SME Units in $ (*$361)\", each if [DEAL_TYPE] = \"SME Unit\" \nand [SALES_TYPE] <> \"4 - Renewal\"\n then [UNIT] * 361\nelse 0),\n #\"Added Custom2\" = Table.AddColumn(#\"Added Custom1\", \"Banklink in $ (*$148)\", each [Banklink Units] * 148)\nin\n #\"Added Custom2\"", "viewLanguage": "m_query" } }, @@ -2114,7 +2146,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { @@ -2123,7 +2155,7 @@ "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" }, "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "big-query-with-parameter", + "name": "snowflake native-query", "description": "Library dataset description", "tags": [] } @@ -2136,7 +2168,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -2152,14 +2184,13 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -2169,6 +2200,112 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "let\n Source = GoogleBigQuery.Database([BillingProject = #\"Parameter - Source\"]),\n#\"gcp-project\" = Source{[Name=#\"Parameter - Source\"]}[Data],\nuniversal_Schema = #\"gcp-project\"{[Name=\"universal\",Kind=\"Schema\"]}[Data],\nD_WH_DATE_Table = universal_Schema{[Name=\"D_WH_DATE\",Kind=\"Table\"]}[Data],\n#\"Filtered Rows\" = Table.SelectRows(D_WH_DATE_Table, each [D_DATE] > #datetime(2019, 9, 10, 0, 0, 0)),\n#\"Filtered Rows1\" = Table.SelectRows(#\"Filtered Rows\", each DateTime.IsInPreviousNHours([D_DATE], 87600))\n in \n#\"Filtered Rows1\"", + "viewLanguage": "m_query" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", + "name": "big-query-with-parameter", + "description": "Library dataset description", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", @@ -2233,8 +2370,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -2244,6 +2380,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", @@ -2308,8 +2460,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -2319,6 +2470,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", @@ -2383,8 +2550,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -2394,6 +2560,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "corpuser", "entityUrn": "urn:li:corpuser:users.User1@foo.com", @@ -2495,6 +2677,24 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,pages.5b218778-e7a5-4d73-8187-f10824047715.ReportSection)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "PowerBI Page" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "chart", "entityUrn": "urn:li:chart:(powerbi,pages.5b218778-e7a5-4d73-8187-f10824047715.ReportSection)", @@ -2619,6 +2819,24 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,pages.5b218778-e7a5-4d73-8187-f10824047715.ReportSection1)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "PowerBI Page" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "chart", "entityUrn": "urn:li:chart:(powerbi,pages.5b218778-e7a5-4d73-8187-f10824047715.ReportSection1)", @@ -2692,6 +2910,60 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.5b218778-e7a5-4d73-8187-f10824047715)", + "changeType": "PATCH", + "aspectName": "dashboardInfo", + "aspect": { + "json": [ + { + "op": "add", + "path": "/title", + "value": "SalesMarketing" + }, + { + "op": "add", + "path": "/description", + "value": "Acryl sales marketing report" + }, + { + "op": "add", + "path": "/charts/urn:li:chart:(powerbi,pages.5b218778-e7a5-4d73-8187-f10824047715.ReportSection)", + "value": "urn:li:chart:(powerbi,pages.5b218778-e7a5-4d73-8187-f10824047715.ReportSection)" + }, + { + "op": "add", + "path": "/charts/urn:li:chart:(powerbi,pages.5b218778-e7a5-4d73-8187-f10824047715.ReportSection1)", + "value": "urn:li:chart:(powerbi,pages.5b218778-e7a5-4d73-8187-f10824047715.ReportSection1)" + }, + { + "op": "add", + "path": "/dashboardUrl", + "value": "https://app.powerbi.com/groups/f089354e-8366-4e18-aea3-4cb4a3a50b48/reports/5b218778-e7a5-4d73-8187-f10824047715" + }, + { + "op": "add", + "path": "/lastModified", + "value": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dashboard", "entityUrn": "urn:li:dashboard:(powerbi,reports.5b218778-e7a5-4d73-8187-f10824047715)", @@ -2774,15 +3046,13 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,pages.5b218778-e7a5-4d73-8187-f10824047715.ReportSection)", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.5b218778-e7a5-4d73-8187-f10824047715)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "container", "aspect": { "json": { - "typeNames": [ - "PowerBI Page" - ] + "container": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9" } }, "systemMetadata": { @@ -2795,10 +3065,411 @@ "entityType": "dashboard", "entityUrn": "urn:li:dashboard:(powerbi,reports.5b218778-e7a5-4d73-8187-f10824047715)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9" + "path": [ + { + "id": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9", + "urn": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "powerbi", + "dataset": "05169CD2-E713-41E6-9600-1D8066D95445" + }, + "name": "library-dataset" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:powerbi" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Semantic Model" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "dummy", + "viewLanguage": "m_query" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", + "name": "public issue_history", + "description": "Library dataset description", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "let\n Source = Snowflake.Databases(\"hp123rt5.ap-southeast-2.fakecomputing.com\",\"PBI_TEST_WAREHOUSE_PROD\",[Role=\"PBI_TEST_MEMBER\"]),\n PBI_TEST_Database = Source{[Name=\"PBI_TEST\",Kind=\"Database\"]}[Data],\n TEST_Schema = PBI_TEST_Database{[Name=\"TEST\",Kind=\"Schema\"]}[Data],\n TESTTABLE_Table = TEST_Schema{[Name=\"TESTTABLE\",Kind=\"Table\"]}[Data]\nin\n TESTTABLE_Table", + "viewLanguage": "m_query" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", + "name": "SNOWFLAKE_TESTTABLE", + "description": "Library dataset description", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "let\n Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"SELECT#(lf)concat((UPPER(REPLACE(SELLER,'-',''))), MONTHID) as AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTHID) as CD_AGENT_KEY,#(lf) *#(lf)FROM#(lf)OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_APS_SME_UNITS_V4\", null, [EnableFolding=true]),\n #\"Added Conditional Column\" = Table.AddColumn(Source, \"SME Units ENT\", each if [DEAL_TYPE] = \"SME Unit\" then [UNIT] else 0),\n #\"Added Conditional Column1\" = Table.AddColumn(#\"Added Conditional Column\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" then [UNIT] else 0),\n #\"Removed Columns\" = Table.RemoveColumns(#\"Added Conditional Column1\",{\"Banklink Units\"}),\n #\"Added Custom\" = Table.AddColumn(#\"Removed Columns\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" and [SALES_TYPE] = \"3 - Upsell\"\nthen [UNIT]\n\nelse if [SALES_TYPE] = \"Adjusted BL Migration\"\nthen [UNIT]\n\nelse 0),\n #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"SME Units in $ (*$361)\", each if [DEAL_TYPE] = \"SME Unit\" \nand [SALES_TYPE] <> \"4 - Renewal\"\n then [UNIT] * 361\nelse 0),\n #\"Added Custom2\" = Table.AddColumn(#\"Added Custom1\", \"Banklink in $ (*$148)\", each [Banklink Units] * 148)\nin\n #\"Added Custom2\"", + "viewLanguage": "m_query" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", + "name": "snowflake native-query", + "description": "Library dataset description", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "let\n Source = GoogleBigQuery.Database([BillingProject = #\"Parameter - Source\"]),\n#\"gcp-project\" = Source{[Name=#\"Parameter - Source\"]}[Data],\nuniversal_Schema = #\"gcp-project\"{[Name=\"universal\",Kind=\"Schema\"]}[Data],\nD_WH_DATE_Table = universal_Schema{[Name=\"D_WH_DATE\",Kind=\"Table\"]}[Data],\n#\"Filtered Rows\" = Table.SelectRows(D_WH_DATE_Table, each [D_DATE] > #datetime(2019, 9, 10, 0, 0, 0)),\n#\"Filtered Rows1\" = Table.SelectRows(#\"Filtered Rows\", each DateTime.IsInPreviousNHours([D_DATE], 87600))\n in \n#\"Filtered Rows1\"", + "viewLanguage": "m_query" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", + "name": "big-query-with-parameter", + "description": "Library dataset description", + "tags": [] } }, "systemMetadata": { @@ -2808,17 +3479,30 @@ } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,reports.5b218778-e7a5-4d73-8187-f10824047715)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "status", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9", - "urn": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9" - } + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" ] } }, @@ -2829,8 +3513,64 @@ } }, { - "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:users.User1@foo.com", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "let\n Source = Value.NativeQuery(Snowflake.Databases(\"xaa48144.snowflakecomputing.com\",\"GSL_TEST_WH\",[Role=\"ACCOUNTADMIN\"]){[Name=\"GSL_TEST_DB\"]}[Data], \"select A.name from GSL_TEST_DB.PUBLIC.SALES_ANALYST as A inner join GSL_TEST_DB.PUBLIC.SALES_FORECAST as B on A.name = B.name where startswith(A.name, 'mo')\", null, [EnableFolding=true])\nin\n Source", + "viewLanguage": "m_query" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", + "name": "snowflake native-query-with-join", + "description": "Library dataset description", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -2845,8 +3585,82 @@ } }, { - "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:users.User2@foo.com", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "let\n Source = Oracle.Database(\"localhost:1521/salesdb.domain.com\", [HierarchicalNavigation=true]), HR = Source{[Schema=\"HR\"]}[Data], EMPLOYEES1 = HR{[Name=\"EMPLOYEES\"]}[Data] \n in EMPLOYEES1", + "viewLanguage": "m_query" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", + "name": "job-history", + "description": "Library dataset description", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -2861,13 +3675,15 @@ } }, { - "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:users.User4@foo.com", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", "changeType": "UPSERT", - "aspectName": "corpUserKey", + "aspectName": "subTypes", "aspect": { "json": { - "username": "User4@foo.com" + "typeNames": [ + "Table" + ] } }, "systemMetadata": { @@ -2877,8 +3693,64 @@ } }, { - "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:users.User4@foo.com", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "let\n Source = PostgreSQL.Database(\"localhost\" , \"mics\" ),\n public_order_date = Source{[Schema=\"public\",Item=\"order_date\"]}[Data] \n in \n public_order_date", + "viewLanguage": "m_query" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", + "name": "postgres_test_table", + "description": "Library dataset description", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -2893,14 +3765,14 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,pages.5b218778-e7a5-4d73-8187-f10824047715.ReportSection1)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { "json": { "typeNames": [ - "PowerBI Page" + "Table" ] } }, @@ -2910,14 +3782,14 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:users.User3@foo.com", +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", "changeType": "UPSERT", - "aspectName": "corpUserKey", + "aspectName": "container", "aspect": { "json": { - "username": "User3@foo.com" + "container": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2" } }, "systemMetadata": { @@ -2927,13 +3799,15 @@ } }, { - "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:users.User3@foo.com", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.584cf13a-1485-41c2-a514-b1bb66fff163)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "browsePaths", "aspect": { "json": { - "removed": false + "paths": [ + "/powerbi/demo-workspace" + ] } }, "systemMetadata": { @@ -2944,7 +3818,7 @@ }, { "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,reports.5b218778-e7a5-4d73-8187-f10824047715)", + "entityUrn": "urn:li:dashboard:(powerbi,reports.584cf13a-1485-41c2-a514-b1bb66fff163)", "changeType": "PATCH", "aspectName": "dashboardInfo", "aspect": { @@ -2952,27 +3826,17 @@ { "op": "add", "path": "/title", - "value": "SalesMarketing" + "value": "Printable SalesMarketing" }, { "op": "add", "path": "/description", "value": "Acryl sales marketing report" }, - { - "op": "add", - "path": "/charts/urn:li:chart:(powerbi,pages.5b218778-e7a5-4d73-8187-f10824047715.ReportSection)", - "value": "urn:li:chart:(powerbi,pages.5b218778-e7a5-4d73-8187-f10824047715.ReportSection)" - }, - { - "op": "add", - "path": "/charts/urn:li:chart:(powerbi,pages.5b218778-e7a5-4d73-8187-f10824047715.ReportSection1)", - "value": "urn:li:chart:(powerbi,pages.5b218778-e7a5-4d73-8187-f10824047715.ReportSection1)" - }, { "op": "add", "path": "/dashboardUrl", - "value": "https://app.powerbi.com/groups/f089354e-8366-4e18-aea3-4cb4a3a50b48/reports/5b218778-e7a5-4d73-8187-f10824047715" + "value": "https://app.powerbi.com/groups/f089354e-8366-4e18-aea3-4cb4a3a50b48/reports/584cf13a-1485-41c2-a514-b1bb66fff163" }, { "op": "add", @@ -2997,8 +3861,8 @@ } }, { - "entityType": "container", - "entityUrn": "urn:li:container:33c7cab6ea0e58930cd6f943d0a4111e", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.584cf13a-1485-41c2-a514-b1bb66fff163)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -3012,6 +3876,78 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.584cf13a-1485-41c2-a514-b1bb66fff163)", + "changeType": "UPSERT", + "aspectName": "dashboardKey", + "aspect": { + "json": { + "dashboardTool": "powerbi", + "dashboardId": "powerbi.linkedin.com/dashboards/584cf13a-1485-41c2-a514-b1bb66fff163" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.584cf13a-1485-41c2-a514-b1bb66fff163)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "PaginatedReport" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.584cf13a-1485-41c2-a514-b1bb66fff163)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.584cf13a-1485-41c2-a514-b1bb66fff163)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9", + "urn": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "container", "entityUrn": "urn:li:container:33c7cab6ea0e58930cd6f943d0a4111e", @@ -3036,10 +3972,10 @@ "entityType": "container", "entityUrn": "urn:li:container:33c7cab6ea0e58930cd6f943d0a4111e", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "status", "aspect": { "json": { - "path": [] + "removed": false } }, "systemMetadata": { @@ -3083,15 +4019,13 @@ } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-8FFC-4505-9215-655BCA5BEBAE)", + "entityType": "container", + "entityUrn": "urn:li:container:33c7cab6ea0e58930cd6f943d0a4111e", "changeType": "UPSERT", - "aspectName": "browsePaths", + "aspectName": "browsePathsV2", "aspect": { "json": { - "paths": [ - "/powerbi/second-demo-workspace" - ] + "path": [] } }, "systemMetadata": { @@ -3101,13 +4035,29 @@ } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-8FFC-4505-9215-655BCA5BEBAE)", + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:users.User3@foo.com", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "corpUserKey", "aspect": { "json": { - "removed": false + "username": "User3@foo.com" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:users.User4@foo.com", + "changeType": "UPSERT", + "aspectName": "corpUserKey", + "aspect": { + "json": { + "username": "User4@foo.com" } }, "systemMetadata": { @@ -3120,14 +4070,11 @@ "entityType": "dashboard", "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-8FFC-4505-9215-655BCA5BEBAE)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "browsePaths", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:33c7cab6ea0e58930cd6f943d0a4111e", - "urn": "urn:li:container:33c7cab6ea0e58930cd6f943d0a4111e" - } + "paths": [ + "/powerbi/second-demo-workspace" ] } }, @@ -3195,11 +4142,10 @@ "entityType": "dashboard", "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-8FFC-4505-9215-655BCA5BEBAE)", "changeType": "UPSERT", - "aspectName": "dashboardKey", + "aspectName": "status", "aspect": { "json": { - "dashboardTool": "powerbi", - "dashboardId": "powerbi.linkedin.com/dashboards/7D668CAD-8FFC-4505-9215-655BCA5BEBAE" + "removed": false } }, "systemMetadata": { @@ -3212,10 +4158,11 @@ "entityType": "dashboard", "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-8FFC-4505-9215-655BCA5BEBAE)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "dashboardKey", "aspect": { "json": { - "container": "urn:li:container:33c7cab6ea0e58930cd6f943d0a4111e" + "dashboardTool": "powerbi", + "dashboardId": "powerbi.linkedin.com/dashboards/7D668CAD-8FFC-4505-9215-655BCA5BEBAE" } }, "systemMetadata": { @@ -3253,5 +4200,106 @@ "runId": "powerbi-test", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-8FFC-4505-9215-655BCA5BEBAE)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:33c7cab6ea0e58930cd6f943d0a4111e" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-8FFC-4505-9215-655BCA5BEBAE)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:33c7cab6ea0e58930cd6f943d0a4111e", + "urn": "urn:li:container:33c7cab6ea0e58930cd6f943d0a4111e" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:users.User1@foo.com", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:users.User2@foo.com", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:users.User3@foo.com", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:users.User4@foo.com", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_disabled_ownership.json b/metadata-ingestion/tests/integration/powerbi/golden_test_disabled_ownership.json index c5414444cc35b..665f5d5a3bb41 100644 --- a/metadata-ingestion/tests/integration/powerbi/golden_test_disabled_ownership.json +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_disabled_ownership.json @@ -63,8 +63,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -138,8 +137,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -213,8 +211,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -288,8 +285,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -363,8 +359,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -438,8 +433,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -513,8 +507,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -588,8 +581,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -663,8 +655,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_endorsement.json b/metadata-ingestion/tests/integration/powerbi/golden_test_endorsement.json index e1ddbfb901bad..26476e61a0bd7 100644 --- a/metadata-ingestion/tests/integration/powerbi/golden_test_endorsement.json +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_endorsement.json @@ -63,8 +63,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -158,8 +157,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -253,8 +251,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -348,8 +345,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -443,8 +439,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -538,8 +533,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -633,8 +627,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -728,8 +721,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -803,8 +795,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_independent_datasets.json b/metadata-ingestion/tests/integration/powerbi/golden_test_independent_datasets.json index d204d426a38d3..0b822ad19b425 100644 --- a/metadata-ingestion/tests/integration/powerbi/golden_test_independent_datasets.json +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_independent_datasets.json @@ -13,7 +13,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "powerbi-test" + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" } }, { @@ -34,7 +35,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "powerbi-test" + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" } }, { @@ -49,7 +51,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "powerbi-test" + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" } }, { @@ -60,14 +63,14 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "powerbi-test" + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_ingest.json b/metadata-ingestion/tests/integration/powerbi/golden_test_ingest.json index 6f899a7fa11b7..83f8f881835b7 100644 --- a/metadata-ingestion/tests/integration/powerbi/golden_test_ingest.json +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_ingest.json @@ -63,8 +63,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -138,8 +137,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -213,8 +211,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -288,8 +285,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -363,8 +359,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -438,8 +433,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -513,8 +507,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -588,8 +581,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -663,8 +655,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_ingest_patch_disabled.json b/metadata-ingestion/tests/integration/powerbi/golden_test_ingest_patch_disabled.json index efbd9abfdb911..93a2c533d21ca 100644 --- a/metadata-ingestion/tests/integration/powerbi/golden_test_ingest_patch_disabled.json +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_ingest_patch_disabled.json @@ -63,8 +63,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -138,8 +137,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -213,8 +211,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -288,8 +285,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -363,8 +359,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -438,8 +433,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -513,8 +507,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -588,8 +581,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -663,8 +655,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_lineage.json b/metadata-ingestion/tests/integration/powerbi/golden_test_lineage.json index 9a09cb4fec64d..eda831722cc91 100644 --- a/metadata-ingestion/tests/integration/powerbi/golden_test_lineage.json +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_lineage.json @@ -63,8 +63,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -138,8 +137,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -238,8 +236,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -338,8 +335,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -413,8 +409,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -521,8 +516,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -621,8 +615,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -721,8 +714,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -821,8 +813,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_lower_case_urn_ingest.json b/metadata-ingestion/tests/integration/powerbi/golden_test_lower_case_urn_ingest.json index a4eb670a4b7f9..6f502cdfc0f5b 100644 --- a/metadata-ingestion/tests/integration/powerbi/golden_test_lower_case_urn_ingest.json +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_lower_case_urn_ingest.json @@ -167,8 +167,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -186,8 +185,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -279,8 +277,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -336,8 +333,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -409,8 +405,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -446,8 +441,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -668,8 +662,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -744,8 +737,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -819,8 +811,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_most_config_and_modified_since_admin_only.json b/metadata-ingestion/tests/integration/powerbi/golden_test_most_config_and_modified_since_admin_only.json index 66e87952bf141..4393a87d1f570 100644 --- a/metadata-ingestion/tests/integration/powerbi/golden_test_most_config_and_modified_since_admin_only.json +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_most_config_and_modified_since_admin_only.json @@ -182,33 +182,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", - "changeType": "UPSERT", - "aspectName": "upstreamLineage", - "aspect": { - "json": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:mssql,reporting-db.library.dbo.book_issue,PROD)", - "type": "TRANSFORMED" - } + "Table" ] } }, @@ -354,12 +328,21 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "upstreamLineage", "aspect": { "json": { - "removed": false + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:mssql,reporting-db.library.dbo.book_issue,PROD)", + "type": "TRANSFORMED" + } + ] } }, "systemMetadata": { @@ -372,13 +355,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "status", "aspect": { "json": { - "typeNames": [ - "PowerBI Dataset Table", - "View" - ] + "removed": false } }, "systemMetadata": { @@ -391,18 +371,11 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", "changeType": "UPSERT", - "aspectName": "upstreamLineage", + "aspectName": "subTypes", "aspect": { "json": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:mssql,reporting-db.COMMOPSDB.dbo.V_PS_CD_RETENTION,PROD)", - "type": "TRANSFORMED" - } + "typeNames": [ + "Table" ] } }, @@ -600,12 +573,21 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.revenue,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "upstreamLineage", "aspect": { "json": { - "removed": false + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:mssql,reporting-db.COMMOPSDB.dbo.V_PS_CD_RETENTION,PROD)", + "type": "TRANSFORMED" + } + ] } }, "systemMetadata": { @@ -618,13 +600,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.revenue,DEV)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "status", "aspect": { "json": { - "typeNames": [ - "PowerBI Dataset Table", - "View" - ] + "removed": false } }, "systemMetadata": { @@ -637,18 +616,11 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.revenue,DEV)", "changeType": "UPSERT", - "aspectName": "upstreamLineage", + "aspectName": "subTypes", "aspect": { "json": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:mssql,reporting-db.analytics.analytics.sales_revenue,PROD)", - "type": "TRANSFORMED" - } + "typeNames": [ + "Table" ] } }, @@ -819,6 +791,31 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.revenue,DEV)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:mssql,reporting-db.analytics.analytics.sales_revenue,PROD)", + "type": "TRANSFORMED" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "chart", "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", @@ -1395,7 +1392,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset" + "Semantic Model" ] } }, diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_personal_ingest.json b/metadata-ingestion/tests/integration/powerbi/golden_test_personal_ingest.json new file mode 100644 index 0000000000000..f8c0fdc17c880 --- /dev/null +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_personal_ingest.json @@ -0,0 +1,329 @@ +[ +{ + "entityType": "container", + "entityUrn": "urn:li:container:4719aeafb92339db2b69194bcbe55c9a", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "powerbi", + "workspace": "Jane Smith Workspace" + }, + "name": "Jane Smith Workspace" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:4719aeafb92339db2b69194bcbe55c9a", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:4719aeafb92339db2b69194bcbe55c9a", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:powerbi" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:4719aeafb92339db2b69194bcbe55c9a", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "PersonalGroup" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:users.User1@foo.com", + "changeType": "UPSERT", + "aspectName": "corpUserKey", + "aspect": { + "json": { + "username": "User1@foo.com" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:users.User2@foo.com", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "changeType": "UPSERT", + "aspectName": "browsePaths", + "aspect": { + "json": { + "paths": [ + "/powerbi/Jane Smith Workspace" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:4719aeafb92339db2b69194bcbe55c9a", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "changeType": "PATCH", + "aspectName": "dashboardInfo", + "aspect": { + "json": [ + { + "op": "add", + "path": "/customProperties/chartCount", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/workspaceName", + "value": "Jane Smith Workspace" + }, + { + "op": "add", + "path": "/customProperties/workspaceId", + "value": "90E9E256-3D6D-4D38-86C8-6CCCBD8C170C" + }, + { + "op": "add", + "path": "/title", + "value": "test_dashboard" + }, + { + "op": "add", + "path": "/description", + "value": "Description of test dashboard" + }, + { + "op": "add", + "path": "/dashboardUrl", + "value": "https://localhost/dashboards/web/1" + }, + { + "op": "add", + "path": "/lastModified", + "value": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:users.User2@foo.com", + "changeType": "UPSERT", + "aspectName": "corpUserKey", + "aspect": { + "json": { + "username": "User2@foo.com" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "changeType": "UPSERT", + "aspectName": "dashboardKey", + "aspect": { + "json": { + "dashboardTool": "powerbi", + "dashboardId": "powerbi.linkedin.com/dashboards/7D668CAD-7FFC-4505-9215-655BCA5BEBAE" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:4719aeafb92339db2b69194bcbe55c9a" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:users.User1@foo.com", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:users.User1@foo.com", + "type": "NONE" + }, + { + "owner": "urn:li:corpuser:users.User2@foo.com", + "type": "NONE" + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:4719aeafb92339db2b69194bcbe55c9a", + "urn": "urn:li:container:4719aeafb92339db2b69194bcbe55c9a" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_platform_instance_ingest.json b/metadata-ingestion/tests/integration/powerbi/golden_test_platform_instance_ingest.json index ea1ee0df4b105..6da5f5781112e 100644 --- a/metadata-ingestion/tests/integration/powerbi/golden_test_platform_instance_ingest.json +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_platform_instance_ingest.json @@ -63,8 +63,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -138,8 +137,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -213,8 +211,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -288,8 +285,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -363,8 +359,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -438,8 +433,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -513,8 +507,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -588,8 +581,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -663,8 +655,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_profiling.json b/metadata-ingestion/tests/integration/powerbi/golden_test_profiling.json index 580a8d1a1db11..b8963a0d7782d 100644 --- a/metadata-ingestion/tests/integration/powerbi/golden_test_profiling.json +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_profiling.json @@ -48,8 +48,8 @@ "json": { "timestampMillis": 1645599600000, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "rowCount": 542300, "columnCount": 4, @@ -115,8 +115,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_report.json b/metadata-ingestion/tests/integration/powerbi/golden_test_report.json index 094869bfd24f1..f6248db9008af 100644 --- a/metadata-ingestion/tests/integration/powerbi/golden_test_report.json +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_report.json @@ -63,8 +63,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -138,8 +137,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -213,8 +211,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -288,8 +285,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -363,8 +359,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -438,8 +433,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -513,8 +507,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -588,8 +581,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -663,8 +655,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -910,11 +901,12 @@ "entityType": "chart", "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "chartKey", + "aspectName": "subTypes", "aspect": { "json": { - "dashboardTool": "powerbi", - "chartId": "charts.23212598-23b5-4980-87cc-5fc0ecd84385" + "typeNames": [ + "PowerBI Tile" + ] } }, "systemMetadata": { @@ -927,12 +919,11 @@ "entityType": "chart", "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "chartKey", "aspect": { "json": { - "typeNames": [ - "PowerBI Tile" - ] + "dashboardTool": "powerbi", + "chartId": "charts.23212598-23b5-4980-87cc-5fc0ecd84385" } }, "systemMetadata": { @@ -1213,8 +1204,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -1288,8 +1278,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -1363,8 +1352,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -1438,8 +1426,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -1513,8 +1500,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -1588,8 +1574,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -1663,8 +1648,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -1904,11 +1888,11 @@ "entityType": "chart", "entityUrn": "urn:li:chart:(powerbi,pages.5b218778-e7a5-4d73-8187-f10824047715.ReportSection1)", "changeType": "UPSERT", - "aspectName": "browsePaths", + "aspectName": "subTypes", "aspect": { "json": { - "paths": [ - "/powerbi/demo-workspace" + "typeNames": [ + "PowerBI Page" ] } }, @@ -1922,11 +1906,11 @@ "entityType": "chart", "entityUrn": "urn:li:chart:(powerbi,pages.5b218778-e7a5-4d73-8187-f10824047715.ReportSection1)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePaths", "aspect": { "json": { - "typeNames": [ - "PowerBI Page" + "paths": [ + "/powerbi/demo-workspace" ] } }, @@ -2129,6 +2113,657 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "dummy", + "viewLanguage": "m_query" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", + "name": "public issue_history", + "description": "Library dataset description", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "let\n Source = Snowflake.Databases(\"hp123rt5.ap-southeast-2.fakecomputing.com\",\"PBI_TEST_WAREHOUSE_PROD\",[Role=\"PBI_TEST_MEMBER\"]),\n PBI_TEST_Database = Source{[Name=\"PBI_TEST\",Kind=\"Database\"]}[Data],\n TEST_Schema = PBI_TEST_Database{[Name=\"TEST\",Kind=\"Schema\"]}[Data],\n TESTTABLE_Table = TEST_Schema{[Name=\"TESTTABLE\",Kind=\"Table\"]}[Data]\nin\n TESTTABLE_Table", + "viewLanguage": "m_query" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", + "name": "SNOWFLAKE_TESTTABLE", + "description": "Library dataset description", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "let\n Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"SELECT#(lf)concat((UPPER(REPLACE(SELLER,'-',''))), MONTHID) as AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTHID) as CD_AGENT_KEY,#(lf) *#(lf)FROM#(lf)OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_APS_SME_UNITS_V4\", null, [EnableFolding=true]),\n #\"Added Conditional Column\" = Table.AddColumn(Source, \"SME Units ENT\", each if [DEAL_TYPE] = \"SME Unit\" then [UNIT] else 0),\n #\"Added Conditional Column1\" = Table.AddColumn(#\"Added Conditional Column\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" then [UNIT] else 0),\n #\"Removed Columns\" = Table.RemoveColumns(#\"Added Conditional Column1\",{\"Banklink Units\"}),\n #\"Added Custom\" = Table.AddColumn(#\"Removed Columns\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" and [SALES_TYPE] = \"3 - Upsell\"\nthen [UNIT]\n\nelse if [SALES_TYPE] = \"Adjusted BL Migration\"\nthen [UNIT]\n\nelse 0),\n #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"SME Units in $ (*$361)\", each if [DEAL_TYPE] = \"SME Unit\" \nand [SALES_TYPE] <> \"4 - Renewal\"\n then [UNIT] * 361\nelse 0),\n #\"Added Custom2\" = Table.AddColumn(#\"Added Custom1\", \"Banklink in $ (*$148)\", each [Banklink Units] * 148)\nin\n #\"Added Custom2\"", + "viewLanguage": "m_query" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", + "name": "snowflake native-query", + "description": "Library dataset description", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "let\n Source = GoogleBigQuery.Database([BillingProject = #\"Parameter - Source\"]),\n#\"gcp-project\" = Source{[Name=#\"Parameter - Source\"]}[Data],\nuniversal_Schema = #\"gcp-project\"{[Name=\"universal\",Kind=\"Schema\"]}[Data],\nD_WH_DATE_Table = universal_Schema{[Name=\"D_WH_DATE\",Kind=\"Table\"]}[Data],\n#\"Filtered Rows\" = Table.SelectRows(D_WH_DATE_Table, each [D_DATE] > #datetime(2019, 9, 10, 0, 0, 0)),\n#\"Filtered Rows1\" = Table.SelectRows(#\"Filtered Rows\", each DateTime.IsInPreviousNHours([D_DATE], 87600))\n in \n#\"Filtered Rows1\"", + "viewLanguage": "m_query" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", + "name": "big-query-with-parameter", + "description": "Library dataset description", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "let\n Source = Value.NativeQuery(Snowflake.Databases(\"xaa48144.snowflakecomputing.com\",\"GSL_TEST_WH\",[Role=\"ACCOUNTADMIN\"]){[Name=\"GSL_TEST_DB\"]}[Data], \"select A.name from GSL_TEST_DB.PUBLIC.SALES_ANALYST as A inner join GSL_TEST_DB.PUBLIC.SALES_FORECAST as B on A.name = B.name where startswith(A.name, 'mo')\", null, [EnableFolding=true])\nin\n Source", + "viewLanguage": "m_query" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", + "name": "snowflake native-query-with-join", + "description": "Library dataset description", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "let\n Source = Oracle.Database(\"localhost:1521/salesdb.domain.com\", [HierarchicalNavigation=true]), HR = Source{[Schema=\"HR\"]}[Data], EMPLOYEES1 = HR{[Name=\"EMPLOYEES\"]}[Data] \n in EMPLOYEES1", + "viewLanguage": "m_query" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", + "name": "job-history", + "description": "Library dataset description", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "let\n Source = PostgreSQL.Database(\"localhost\" , \"mics\" ),\n public_order_date = Source{[Schema=\"public\",Item=\"order_date\"]}[Data] \n in \n public_order_date", + "viewLanguage": "m_query" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", + "name": "postgres_test_table", + "description": "Library dataset description", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.584cf13a-1485-41c2-a514-b1bb66fff163)", + "changeType": "UPSERT", + "aspectName": "browsePaths", + "aspect": { + "json": { + "paths": [ + "/powerbi/demo-workspace" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.584cf13a-1485-41c2-a514-b1bb66fff163)", + "changeType": "PATCH", + "aspectName": "dashboardInfo", + "aspect": { + "json": [ + { + "op": "add", + "path": "/title", + "value": "Printable SalesMarketing" + }, + { + "op": "add", + "path": "/description", + "value": "Acryl sales marketing report" + }, + { + "op": "add", + "path": "/dashboardUrl", + "value": "https://app.powerbi.com/groups/f089354e-8366-4e18-aea3-4cb4a3a50b48/reports/584cf13a-1485-41c2-a514-b1bb66fff163" + }, + { + "op": "add", + "path": "/lastModified", + "value": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.584cf13a-1485-41c2-a514-b1bb66fff163)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.584cf13a-1485-41c2-a514-b1bb66fff163)", + "changeType": "UPSERT", + "aspectName": "dashboardKey", + "aspect": { + "json": { + "dashboardTool": "powerbi", + "dashboardId": "powerbi.linkedin.com/dashboards/584cf13a-1485-41c2-a514-b1bb66fff163" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.584cf13a-1485-41c2-a514-b1bb66fff163)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "PaginatedReport" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.584cf13a-1485-41c2-a514-b1bb66fff163)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "demo-workspace" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "corpuser", "entityUrn": "urn:li:corpuser:users.User1@foo.com", diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_scan_all_workspaces.json b/metadata-ingestion/tests/integration/powerbi/golden_test_scan_all_workspaces.json index dcaa518a3c323..e327ca695beb7 100644 --- a/metadata-ingestion/tests/integration/powerbi/golden_test_scan_all_workspaces.json +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_scan_all_workspaces.json @@ -63,8 +63,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -138,8 +137,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -213,8 +211,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -288,8 +285,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -363,8 +359,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -438,8 +433,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -513,8 +507,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -588,8 +581,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -663,8 +655,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_server_to_platform_instance.json b/metadata-ingestion/tests/integration/powerbi/golden_test_server_to_platform_instance.json index bc5e844f679c7..90c8ee5d0379e 100644 --- a/metadata-ingestion/tests/integration/powerbi/golden_test_server_to_platform_instance.json +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_server_to_platform_instance.json @@ -63,8 +63,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -138,8 +137,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -238,8 +236,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -338,8 +335,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -438,8 +434,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -546,8 +541,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -646,8 +640,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -746,8 +739,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -846,8 +838,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, diff --git a/metadata-ingestion/tests/integration/powerbi/test_admin_only_api.py b/metadata-ingestion/tests/integration/powerbi/test_admin_only_api.py index 91c6082524389..b636c12cfda06 100644 --- a/metadata-ingestion/tests/integration/powerbi/test_admin_only_api.py +++ b/metadata-ingestion/tests/integration/powerbi/test_admin_only_api.py @@ -58,21 +58,28 @@ def register_mock_admin_api(request_mock: Any, override_data: dict = {}) -> None "status_code": 200, "json": admin_datasets_response, }, - "https://api.powerbi.com/v1.0/myorg/admin/groups": { + "https://api.powerbi.com/v1.0/myorg/admin/groups?%24skip=0&%24top=1000": { "method": "GET", "status_code": 200, "json": { - "@odata.count": 3, "value": [ { "id": "64ED5CAD-7C10-4684-8180-826122881108", "isReadOnly": True, "name": "demo-workspace", "type": "Workspace", + "state": "Active", } ], }, }, + "https://api.powerbi.com/v1.0/myorg/admin/groups?%24skip=1000&%24top=1000": { + "method": "GET", + "status_code": 200, + "json": { + "value": [], + }, + }, "https://api.powerbi.com/v1.0/myorg/admin/groups/64ED5CAD-7C10-4684-8180-826122881108/dashboards": { "method": "GET", "status_code": 200, @@ -220,6 +227,7 @@ def register_mock_admin_api(request_mock: Any, override_data: dict = {}) -> None { "id": "64ED5CAD-7C10-4684-8180-826122881108", "name": "demo-workspace", + "type": "Workspace", "state": "Active", "datasets": [ { @@ -391,6 +399,7 @@ def register_mock_admin_api(request_mock: Any, override_data: dict = {}) -> None { "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", "id": "5b218778-e7a5-4d73-8187-f10824047715", + "reportType": "PowerBIReport", "name": "SalesMarketing", "description": "Acryl sales marketing report", } @@ -422,6 +431,7 @@ def register_mock_admin_api(request_mock: Any, override_data: dict = {}) -> None "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", "id": "5b218778-e7a5-4d73-8187-f10824047715", "name": "SalesMarketing", + "reportType": "PowerBIReport", "description": "Acryl sales marketing report", "webUrl": "https://app.powerbi.com/groups/f089354e-8366-4e18-aea3-4cb4a3a50b48/reports/5b218778-e7a5-4d73-8187-f10824047715", "embedUrl": "https://app.powerbi.com/reportEmbed?reportId=5b218778-e7a5-4d73-8187-f10824047715&groupId=f089354e-8366-4e18-aea3-4cb4a3a50b48", @@ -436,6 +446,7 @@ def register_mock_admin_api(request_mock: Any, override_data: dict = {}) -> None "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", "id": "5b218778-e7a5-4d73-8187-f10824047715", "name": "SalesMarketing", + "reportType": "PowerBIReport", "description": "Acryl sales marketing report", "webUrl": "https://app.powerbi.com/groups/f089354e-8366-4e18-aea3-4cb4a3a50b48/reports/5b218778-e7a5-4d73-8187-f10824047715", "embedUrl": "https://app.powerbi.com/reportEmbed?reportId=5b218778-e7a5-4d73-8187-f10824047715&groupId=f089354e-8366-4e18-aea3-4cb4a3a50b48", diff --git a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py index 23b23ecada0d4..43f77b059e41f 100644 --- a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py +++ b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py @@ -19,6 +19,7 @@ from datahub.ingestion.source.powerbi.rest_api_wrapper.data_classes import ( Page, Report, + ReportType, Workspace, ) from tests.test_helpers import mce_helpers, test_connection_helpers @@ -70,6 +71,9 @@ def scan_init_response(request, context): "64ED5CAD-7C10-4684-8180-826122881108||64ED5CAD-7C22-4684-8180-826122881108": { "id": "a674efd1-603c-4129-8d82-03cf2be05aff" }, + "90E9E256-3D6D-4D38-86C8-6CCCBD8C170C": { + "id": "4278EDC0-85AA-4BF2-B96A-2BC6C82B73C3" + }, } return w_id_vs_response[workspace_id] @@ -78,11 +82,10 @@ def scan_init_response(request, context): def register_mock_api(request_mock: Any, override_data: Optional[dict] = None) -> None: override_data = override_data or {} api_vs_response = { - "https://api.powerbi.com/v1.0/myorg/groups": { + "https://api.powerbi.com/v1.0/myorg/groups?%24skip=0&%24top=1000": { "method": "GET", "status_code": 200, "json": { - "@odata.count": 3, "value": [ { "id": "64ED5CAD-7C10-4684-8180-826122881108", @@ -105,6 +108,13 @@ def register_mock_api(request_mock: Any, override_data: Optional[dict] = None) - ], }, }, + "https://api.powerbi.com/v1.0/myorg/groups?%24skip=1000&%24top=1000": { + "method": "GET", + "status_code": 200, + "json": { + "value": [], + }, + }, "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/dashboards": { "method": "GET", "status_code": 200, @@ -228,6 +238,11 @@ def register_mock_api(request_mock: Any, override_data: Optional[dict] = None) - ] }, }, + "https://api.powerbi.com/v1.0/myorg/groups/90E9E256-3D6D-4D38-86C8-6CCCBD8C170C/dashboards/7D668CAD-7FFC-4505-9215-655BCA5BEBAE/tiles": { + "method": "GET", + "status_code": 200, + "json": {"value": []}, + }, "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C22-4684-8180-826122881108/dashboards/7D668CAD-8FFC-4505-9215-655BCA5BEBAE/tiles": { "method": "GET", "status_code": 200, @@ -318,6 +333,7 @@ def register_mock_api(request_mock: Any, override_data: Optional[dict] = None) - "id": "64ED5CAD-7C10-4684-8180-826122881108", "name": "demo-workspace", "state": "Active", + "type": "Workspace", "datasets": [ { "id": "05169CD2-E713-41E6-9600-1D8066D95445", @@ -473,6 +489,7 @@ def register_mock_api(request_mock: Any, override_data: Optional[dict] = None) - { "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", "id": "5b218778-e7a5-4d73-8187-f10824047715", + "reportType": "PaginatedReport", "name": "SalesMarketing", "description": "Acryl sales marketing report", } @@ -489,6 +506,7 @@ def register_mock_api(request_mock: Any, override_data: Optional[dict] = None) - { "id": "64ED5CAD-7C22-4684-8180-826122881108", "name": "second-demo-workspace", + "type": "Workspace", "state": "Active", "datasets": [ { @@ -515,9 +533,17 @@ def register_mock_api(request_mock: Any, override_data: Optional[dict] = None) - { "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", "id": "5b218778-e7a5-4d73-8187-f10824047715", + "reportType": "PowerBIReport", "name": "SalesMarketing", "description": "Acryl sales marketing report", - } + }, + { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", + "id": "584cf13a-1485-41c2-a514-b1bb66fff163", + "reportType": "PaginatedReport", + "name": "SalesMarketing", + "description": "Acryl sales marketing report", + }, ], }, ] @@ -536,11 +562,21 @@ def register_mock_api(request_mock: Any, override_data: Optional[dict] = None) - { "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", "id": "5b218778-e7a5-4d73-8187-f10824047715", + "reportType": "PowerBIReport", "name": "SalesMarketing", "description": "Acryl sales marketing report", "webUrl": "https://app.powerbi.com/groups/f089354e-8366-4e18-aea3-4cb4a3a50b48/reports/5b218778-e7a5-4d73-8187-f10824047715", "embedUrl": "https://app.powerbi.com/reportEmbed?reportId=5b218778-e7a5-4d73-8187-f10824047715&groupId=f089354e-8366-4e18-aea3-4cb4a3a50b48", - } + }, + { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", + "id": "584cf13a-1485-41c2-a514-b1bb66fff163", + "reportType": "PaginatedReport", + "name": "Printable SalesMarketing", + "description": "Acryl sales marketing report", + "webUrl": "https://app.powerbi.com/groups/f089354e-8366-4e18-aea3-4cb4a3a50b48/reports/584cf13a-1485-41c2-a514-b1bb66fff163", + "embedUrl": "https://app.powerbi.com/reportEmbed?reportId=584cf13a-1485-41c2-a514-b1bb66fff163&groupId=f089354e-8366-4e18-aea3-4cb4a3a50b48", + }, ] }, }, @@ -550,12 +586,26 @@ def register_mock_api(request_mock: Any, override_data: Optional[dict] = None) - "json": { "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", "id": "5b218778-e7a5-4d73-8187-f10824047715", + "reportType": "PowerBIReport", "name": "SalesMarketing", "description": "Acryl sales marketing report", "webUrl": "https://app.powerbi.com/groups/f089354e-8366-4e18-aea3-4cb4a3a50b48/reports/5b218778-e7a5-4d73-8187-f10824047715", "embedUrl": "https://app.powerbi.com/reportEmbed?reportId=5b218778-e7a5-4d73-8187-f10824047715&groupId=f089354e-8366-4e18-aea3-4cb4a3a50b48", }, }, + "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/reports/584cf13a-1485-41c2-a514-b1bb66fff163": { + "method": "GET", + "status_code": 200, + "json": { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", + "id": "584cf13a-1485-41c2-a514-b1bb66fff163", + "reportType": "PaginatedReport", + "name": "Printable SalesMarketing", + "description": "Acryl sales marketing report", + "webUrl": "https://app.powerbi.com/groups/f089354e-8366-4e18-aea3-4cb4a3a50b48/reports/584cf13a-1485-41c2-a514-b1bb66fff163", + "embedUrl": "https://app.powerbi.com/reportEmbed?reportId=584cf13a-1485-41c2-a514-b1bb66fff163&groupId=f089354e-8366-4e18-aea3-4cb4a3a50b48", + }, + }, "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/reports/5b218778-e7a5-4d73-8187-f10824047715/pages": { "method": "GET", "status_code": 200, @@ -574,6 +624,11 @@ def register_mock_api(request_mock: Any, override_data: Optional[dict] = None) - ] }, }, + "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/reports/584cf13a-1485-41c2-a514-b1bb66fff163/pages": { + "method": "GET", + "status_code": 400, # Pages API is not supported for PaginatedReport + "text": '{"error":{"code":"InvalidRequest","message":"Request is currently not supported for RDL reports"}}', + }, "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/parameters": { "method": "GET", "status_code": 200, @@ -612,7 +667,8 @@ def register_mock_api(request_mock: Any, override_data: Optional[dict] = None) - request_mock.register_uri( api_vs_response[url]["method"], url, - json=api_vs_response[url]["json"], + json=api_vs_response[url].get("json"), + text=api_vs_response[url].get("text"), status_code=api_vs_response[url]["status_code"], ) @@ -683,6 +739,131 @@ def test_powerbi_ingest( ) +@freeze_time(FROZEN_TIME) +@mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca) +@pytest.mark.integration +def test_powerbi_workspace_type_filter( + mock_msal: MagicMock, + pytestconfig: pytest.Config, + tmp_path: str, + mock_time: datetime.datetime, + requests_mock: Any, +) -> None: + enable_logging() + + test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi" + + register_mock_api( + request_mock=requests_mock, + override_data={ + "https://api.powerbi.com/v1.0/myorg/groups?%24skip=0&%24top=1000": { + "method": "GET", + "status_code": 200, + "json": { + "value": [ + { + "id": "90E9E256-3D6D-4D38-86C8-6CCCBD8C170C", + "isReadOnly": True, + "name": "Jane Smith Workspace", + "type": "PersonalGroup", + "state": "Active", + }, + { + "id": "C6B5DBBC-7580-406C-A6BE-72628C28801C", + "isReadOnly": True, + "name": "Sales", + "type": "Workspace", + "state": "Active", + }, + ], + }, + }, + "https://api.powerbi.com/v1.0/myorg/groups?%24skip=1000&%24top=1000": { + "method": "GET", + "status_code": 200, + "json": { + "value": [], + }, + }, + "https://api.powerbi.com/v1.0/myorg/admin/workspaces/scanResult/4278EDC0-85AA-4BF2-B96A-2BC6C82B73C3": { + "method": "GET", + "status_code": 200, + "json": { + "workspaces": [ + { + "id": "90E9E256-3D6D-4D38-86C8-6CCCBD8C170C", + "name": "Jane Smith Workspace", + "type": "PersonalGroup", + "state": "Active", + "datasets": [], + }, + ] + }, + }, + "https://api.powerbi.com/v1.0/myorg/groups/90E9E256-3D6D-4D38-86C8-6CCCBD8C170C/dashboards": { + "method": "GET", + "status_code": 200, + "json": { + "value": [ + { + "id": "7D668CAD-7FFC-4505-9215-655BCA5BEBAE", + "isReadOnly": True, + "displayName": "test_dashboard", + "description": "Description of test dashboard", + "embedUrl": "https://localhost/dashboards/embed/1", + "webUrl": "https://localhost/dashboards/web/1", + } + ] + }, + }, + "https://api.powerbi.com/v1.0/myorg/admin/workspaces/scanStatus/4278EDC0-85AA-4BF2-B96A-2BC6C82B73C3": { + "method": "GET", + "status_code": 200, + "json": { + "status": "SUCCEEDED", + }, + }, + }, + ) + + default_config: dict = default_source_config() + + del default_config["workspace_id"] + del default_config["workspace_id_pattern"] + + pipeline = Pipeline.create( + { + "run_id": "powerbi-test", + "source": { + "type": "powerbi", + "config": { + **default_config, + "extract_workspaces_to_containers": True, + "workspace_type_filter": [ + "PersonalGroup", + ], + }, + }, + "sink": { + "type": "file", + "config": { + "filename": f"{tmp_path}/powerbi_mces.json", + }, + }, + } + ) + + pipeline.run() + pipeline.raise_from_status() + golden_file = "golden_test_personal_ingest.json" + + mce_helpers.check_golden_file( + pytestconfig, + output_path=f"{tmp_path}/powerbi_mces.json", + golden_path=f"{test_resources_dir}/{golden_file}", + ) + + @freeze_time(FROZEN_TIME) @mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca) @pytest.mark.integration @@ -1439,6 +1620,7 @@ def validate_pipeline(pipeline: Pipeline) -> None: mock_workspace: Workspace = Workspace( id="64ED5CAD-7C10-4684-8180-826122881108", name="demo-workspace", + type="Workspace", datasets={}, dashboards=[], reports=[], @@ -1485,6 +1667,7 @@ def validate_pipeline(pipeline: Pipeline) -> None: Report( id=report[Constant.ID], name=report[Constant.NAME], + type=ReportType.PowerBIReport, webUrl="", embedUrl="", description=report[Constant.DESCRIPTION], @@ -1538,6 +1721,7 @@ def test_reports_with_failed_page_request( { "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", "id": "5b218778-e7a5-4d73-8187-f10824047715", + "reportType": "PowerBIReport", "name": "SalesMarketing", "description": "Acryl sales marketing report", "webUrl": "https://app.powerbi.com/groups/64ED5CAD-7C10-4684-8180-826122881108/reports/5b218778-e7a5-4d73-8187-f10824047715", @@ -1546,6 +1730,7 @@ def test_reports_with_failed_page_request( { "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", "id": "e9fd6b0b-d8c8-4265-8c44-67e183aebf97", + "reportType": "PaginatedReport", "name": "Product", "description": "Acryl product report", "webUrl": "https://app.powerbi.com/groups/64ED5CAD-7C10-4684-8180-826122881108/reports/e9fd6b0b-d8c8-4265-8c44-67e183aebf97", @@ -1561,6 +1746,7 @@ def test_reports_with_failed_page_request( "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", "id": "5b218778-e7a5-4d73-8187-f10824047715", "name": "SalesMarketing", + "reportType": "PowerBIReport", "description": "Acryl sales marketing report", "webUrl": "https://app.powerbi.com/groups/64ED5CAD-7C10-4684-8180-826122881108/reports/5b218778-e7a5-4d73-8187-f10824047715", "embedUrl": "https://app.powerbi.com/reportEmbed?reportId=5b218778-e7a5-4d73-8187-f10824047715&groupId=64ED5CAD-7C10-4684-8180-826122881108", @@ -1572,6 +1758,7 @@ def test_reports_with_failed_page_request( "json": { "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", "id": "e9fd6b0b-d8c8-4265-8c44-67e183aebf97", + "reportType": "PowerBIReport", "name": "Product", "description": "Acryl product report", "webUrl": "https://app.powerbi.com/groups/64ED5CAD-7C10-4684-8180-826122881108/reports/e9fd6b0b-d8c8-4265-8c44-67e183aebf97", @@ -1647,11 +1834,10 @@ def test_independent_datasets_extraction( register_mock_api( request_mock=requests_mock, override_data={ - "https://api.powerbi.com/v1.0/myorg/groups": { + "https://api.powerbi.com/v1.0/myorg/groups?%24skip=0&%24top=1000": { "method": "GET", "status_code": 200, "json": { - "@odata.count": 3, "value": [ { "id": "64ED5CAD-7C10-4684-8180-826122881108", @@ -1662,6 +1848,13 @@ def test_independent_datasets_extraction( ], }, }, + "https://api.powerbi.com/v1.0/myorg/groups?%24skip=1000&%24top=1000": { + "method": "GET", + "status_code": 200, + "json": { + "value": [], + }, + }, "https://api.powerbi.com/v1.0/myorg/admin/workspaces/scanResult/4674efd1-603c-4129-8d82-03cf2be05aff": { "method": "GET", "status_code": 200, @@ -1670,6 +1863,7 @@ def test_independent_datasets_extraction( { "id": "64ED5CAD-7C10-4684-8180-826122881108", "name": "demo-workspace", + "type": "Workspace", "state": "Active", "datasets": [ { diff --git a/metadata-ingestion/tests/integration/powerbi/test_profiling.py b/metadata-ingestion/tests/integration/powerbi/test_profiling.py index 7955386de8940..4b48bed003b1e 100644 --- a/metadata-ingestion/tests/integration/powerbi/test_profiling.py +++ b/metadata-ingestion/tests/integration/powerbi/test_profiling.py @@ -112,21 +112,28 @@ def register_mock_admin_api(request_mock: Any, override_data: dict = {}) -> None "status_code": 200, "json": admin_datasets_response, }, - "https://api.powerbi.com/v1.0/myorg/groups?%24top=1000&%24skip=0&%24filter=type+eq+%27Workspace%27": { + "https://api.powerbi.com/v1.0/myorg/groups?%24skip=0&%24top=1000": { "method": "GET", "status_code": 200, "json": { - "@odata.count": 3, "value": [ { "id": "64ED5CAD-7C10-4684-8180-826122881108", "isReadOnly": True, "name": "demo-workspace", "type": "Workspace", + "state": "Active", } ], }, }, + "https://api.powerbi.com/v1.0/myorg/groups?%24skip=1000&%24top=1000": { + "method": "GET", + "status_code": 200, + "json": { + "value": [], + }, + }, "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/dashboards": { "method": "GET", "status_code": 200, @@ -176,6 +183,7 @@ def register_mock_admin_api(request_mock: Any, override_data: dict = {}) -> None "id": "64ED5CAD-7C10-4684-8180-826122881108", "name": "demo-workspace", "state": "Active", + "type": "Workspace", "datasets": [ { "id": "05169CD2-E713-41E6-9600-1D8066D95445", diff --git a/metadata-ingestion/tests/integration/powerbi/test_stateful_ingestion.py b/metadata-ingestion/tests/integration/powerbi/test_stateful_ingestion.py index 077b48ca177b5..84f7a87ce5d2d 100644 --- a/metadata-ingestion/tests/integration/powerbi/test_stateful_ingestion.py +++ b/metadata-ingestion/tests/integration/powerbi/test_stateful_ingestion.py @@ -23,27 +23,35 @@ def register_mock_api_state1(request_mock): "status_code": 403, "json": {}, }, - "https://api.powerbi.com/v1.0/myorg/groups": { + "https://api.powerbi.com/v1.0/myorg/groups?%24skip=0&%24top=1000": { "method": "GET", "status_code": 200, "json": { - "@odata.count": 1, "value": [ { "id": "64ED5CAD-7C10-4684-8180-826122881108", "isReadOnly": True, "name": "Workspace 1", "type": "Workspace", + "state": "Active", }, { "id": "44444444-7C10-4684-8180-826122881108", "isReadOnly": True, "name": "Multi Workspace", "type": "Workspace", + "state": "Active", }, ], }, }, + "https://api.powerbi.com/v1.0/myorg/groups?%24skip=1000&%24top=1000": { + "method": "GET", + "status_code": 200, + "json": { + "value": [], + }, + }, "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/dashboards": { "method": "GET", "status_code": 200, @@ -114,7 +122,7 @@ def register_mock_api_state2(request_mock): "status_code": 403, "json": {}, }, - "https://api.powerbi.com/v1.0/myorg/groups": { + "https://api.powerbi.com/v1.0/myorg/groups?%24skip=0&%24top=1000": { "method": "GET", "status_code": 200, "json": { @@ -135,6 +143,13 @@ def register_mock_api_state2(request_mock): ], }, }, + "https://api.powerbi.com/v1.0/myorg/groups?%24skip=1000&%24top=1000": { + "method": "GET", + "status_code": 200, + "json": { + "value": [], + }, + }, "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/dashboards": { "method": "GET", "status_code": 200, From 45b0693b8ebe3cd3f9629a7b7f9f359b5566d213 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Fri, 11 Oct 2024 08:55:40 -0700 Subject: [PATCH 03/50] fix(ingest): gracefully handle missing system metadata in client (#11592) --- metadata-ingestion/src/datahub/ingestion/graph/client.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/graph/client.py b/metadata-ingestion/src/datahub/ingestion/graph/client.py index b9b0ed556e66c..e8fae6254ae88 100644 --- a/metadata-ingestion/src/datahub/ingestion/graph/client.py +++ b/metadata-ingestion/src/datahub/ingestion/graph/client.py @@ -559,8 +559,10 @@ def get_entity_as_mcps( post_json_obj = post_json_transform(aspect_json) aspect_value = aspect_type.from_obj(post_json_obj["value"]) - system_metadata_raw = post_json_obj["systemMetadata"] - system_metadata = SystemMetadataClass.from_obj(system_metadata_raw) + system_metadata_raw = post_json_obj.get("systemMetadata") + system_metadata = None + if system_metadata_raw: + system_metadata = SystemMetadataClass.from_obj(system_metadata_raw) mcpw = MetadataChangeProposalWrapper( entityUrn=entity_urn, @@ -590,7 +592,7 @@ def get_entity_semityped( not be present in the dictionary. The entity's key aspect will always be present. """ - mcps = self.get_entity_as_mcps(entity_urn, aspects) + mcps = self.get_entity_as_mcps(entity_urn, aspects=aspects) result: AspectBag = {} for mcp in mcps: From 089f447d9587e40121587b89edd33c585fd408ed Mon Sep 17 00:00:00 2001 From: Gabe Lyons Date: Fri, 11 Oct 2024 09:20:14 -0700 Subject: [PATCH 04/50] docs(assertions): add example of fetching associated dataset to assertion docs (#11566) --- docs/api/tutorials/custom-assertions.md | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/docs/api/tutorials/custom-assertions.md b/docs/api/tutorials/custom-assertions.md index 6544efb8809c2..47975c5739464 100644 --- a/docs/api/tutorials/custom-assertions.md +++ b/docs/api/tutorials/custom-assertions.md @@ -265,7 +265,7 @@ query getAssertion { customType # Will be your custom type. description lastUpdated { - time + time actor } customAssertion { @@ -282,6 +282,18 @@ query getAssertion { } } } + # Fetch what entities have the assertion attached to it + relationships(input: { + types: ["Asserts"] + direction: OUTGOING + }) { + total + relationships { + entity { + urn + } + } + } } } ``` From 14c79389f580e8e4641c8ca60f4ee0e68f2897e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20L=C3=BCdin?= <13187726+Masterchen09@users.noreply.github.com> Date: Fri, 11 Oct 2024 18:40:05 +0200 Subject: [PATCH 05/50] fix(ingest/sac): handle descriptions which are None correctly (#11572) --- .../src/datahub/ingestion/source/sac/sac.py | 35 +++++++++++++++---- .../ingestion/source/sac/sac_common.py | 6 ++-- 2 files changed, 31 insertions(+), 10 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sac/sac.py b/metadata-ingestion/src/datahub/ingestion/source/sac/sac.py index 8309c469f67c5..de0904107b9bb 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sac/sac.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sac/sac.py @@ -329,7 +329,9 @@ def get_resource_workunits( entityUrn=dashboard_urn, aspect=DashboardInfoClass( title=resource.name, - description=resource.description, + description=resource.description + if resource.description is not None + else "", lastModified=ChangeAuditStampsClass( created=AuditStampClass( time=round(resource.created_time.timestamp() * 1000), @@ -559,7 +561,14 @@ def get_sac_connection( retries = 3 backoff_factor = 10 - status_forcelist = (500,) + + # The Resources and Data Import Service APIs of SAP Analytics Cloud can be somewhat unstable, occasionally + # returning HTTP errors for some requests, even though the APIs are generally operational. Therefore, we must + # retry these requests to increase the likelihood that the ingestion is successful. For the same reason we + # should also retry requests that receive a 401 HTTP status; however, this status also legitimately indicates + # that the provided OAuth credentials are invalid or that the OAuth client does not have the correct + # permissions assigned, therefore requests that receive a 401 HTTP status must not be retried. + status_forcelist = (400, 500, 503) retry = Retry( total=retries, @@ -611,7 +620,9 @@ def get_resources(self) -> Iterable[Resource]: entity: pyodata.v2.service.EntityProxy for entity in entities: resource_id: str = entity.resourceId - name: str = entity.name.strip() + name: str = ( + entity.name.strip() if entity.name is not None else entity.resourceId + ) if not self.config.resource_id_pattern.allowed( resource_id @@ -655,8 +666,12 @@ def get_resources(self) -> Iterable[Resource]: ResourceModel( namespace=namespace, model_id=model_id, - name=nav_entity.name.strip(), - description=nav_entity.description.strip(), + name=nav_entity.name.strip() + if nav_entity.name is not None + else f"{namespace}:{model_id}", + description=nav_entity.description.strip() + if nav_entity.description is not None + else None, system_type=nav_entity.systemType, # BW or HANA connection_id=nav_entity.connectionId, external_id=nav_entity.externalId, # query:[][][query] or view:[schema][schema.namespace][view] @@ -678,7 +693,9 @@ def get_resources(self) -> Iterable[Resource]: resource_subtype=entity.resourceSubtype, story_id=entity.storyId, name=name, - description=entity.description.strip(), + description=entity.description.strip() + if entity.description is not None + else None, created_time=entity.createdTime, created_by=created_by, modified_time=entity.modifiedTime, @@ -715,7 +732,11 @@ def get_import_data_model_columns( columns.append( ImportDataModelColumn( name=column["columnName"].strip(), - description=column["descriptionName"].strip(), + description=( + column["descriptionName"].strip() + if column.get("descriptionName") is not None + else None + ), property_type=column["propertyType"], data_type=column["columnDataType"], max_length=column.get("maxLength"), diff --git a/metadata-ingestion/src/datahub/ingestion/source/sac/sac_common.py b/metadata-ingestion/src/datahub/ingestion/source/sac/sac_common.py index 457fda1e06181..2c02b444cea1c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sac/sac_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sac/sac_common.py @@ -8,7 +8,7 @@ class ResourceModel: namespace: str model_id: str name: str - description: str + description: Optional[str] system_type: Optional[str] connection_id: Optional[str] external_id: Optional[str] @@ -22,7 +22,7 @@ class Resource: resource_subtype: str story_id: str name: str - description: str + description: Optional[str] created_time: datetime created_by: Optional[str] modified_time: datetime @@ -36,7 +36,7 @@ class Resource: @dataclass(frozen=True) class ImportDataModelColumn: name: str - description: str + description: Optional[str] property_type: str data_type: str max_length: Optional[int] From ebcce1f87bddf2bf8739b7060a046ac0c80e1fe2 Mon Sep 17 00:00:00 2001 From: skrydal Date: Fri, 11 Oct 2024 18:48:25 +0200 Subject: [PATCH 06/50] fix(ingest/iceberg): Iceberg table name (#11599) --- .../src/datahub/ingestion/source/iceberg/iceberg.py | 1 + metadata-ingestion/tests/integration/iceberg/docker-compose.yml | 2 -- .../integration/iceberg/iceberg_deleted_table_mces_golden.json | 1 + .../tests/integration/iceberg/iceberg_ingest_mces_golden.json | 1 + .../tests/integration/iceberg/iceberg_profile_mces_golden.json | 1 + 5 files changed, 4 insertions(+), 2 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg.py b/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg.py index b5caa83b2ff37..d8c6c03ce81e6 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg.py +++ b/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg.py @@ -182,6 +182,7 @@ def _create_iceberg_workunit( custom_properties["snapshot-id"] = str(table.current_snapshot().snapshot_id) custom_properties["manifest-list"] = table.current_snapshot().manifest_list dataset_properties = DatasetPropertiesClass( + name=table.name()[-1], tags=[], description=table.metadata.properties.get("comment", None), customProperties=custom_properties, diff --git a/metadata-ingestion/tests/integration/iceberg/docker-compose.yml b/metadata-ingestion/tests/integration/iceberg/docker-compose.yml index 8baae6e8ab636..8a05ac7481fe2 100644 --- a/metadata-ingestion/tests/integration/iceberg/docker-compose.yml +++ b/metadata-ingestion/tests/integration/iceberg/docker-compose.yml @@ -1,5 +1,3 @@ -version: "3" - services: spark-iceberg: image: tabulario/spark-iceberg:3.3.2_1.3.0 diff --git a/metadata-ingestion/tests/integration/iceberg/iceberg_deleted_table_mces_golden.json b/metadata-ingestion/tests/integration/iceberg/iceberg_deleted_table_mces_golden.json index 3321fcac0d73e..4b2afb29ddda8 100644 --- a/metadata-ingestion/tests/integration/iceberg/iceberg_deleted_table_mces_golden.json +++ b/metadata-ingestion/tests/integration/iceberg/iceberg_deleted_table_mces_golden.json @@ -11,6 +11,7 @@ }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "name": "another_taxis", "customProperties": { "owner": "root", "created-at": "2024-06-27T17:29:32.492204247Z", diff --git a/metadata-ingestion/tests/integration/iceberg/iceberg_ingest_mces_golden.json b/metadata-ingestion/tests/integration/iceberg/iceberg_ingest_mces_golden.json index b017b6cd31520..477f719ef9317 100644 --- a/metadata-ingestion/tests/integration/iceberg/iceberg_ingest_mces_golden.json +++ b/metadata-ingestion/tests/integration/iceberg/iceberg_ingest_mces_golden.json @@ -11,6 +11,7 @@ }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "name": "taxis", "customProperties": { "owner": "root", "created-at": "2024-05-22T14:08:04.001538500Z", diff --git a/metadata-ingestion/tests/integration/iceberg/iceberg_profile_mces_golden.json b/metadata-ingestion/tests/integration/iceberg/iceberg_profile_mces_golden.json index 453a79494fa25..6d2ca013d81d0 100644 --- a/metadata-ingestion/tests/integration/iceberg/iceberg_profile_mces_golden.json +++ b/metadata-ingestion/tests/integration/iceberg/iceberg_profile_mces_golden.json @@ -11,6 +11,7 @@ }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "name": "taxis", "customProperties": { "owner": "root", "created-at": "2024-05-22T14:10:22.926080700Z", From 4b66757a62a9f2f6d9a713948bfa079f760ccc5d Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Fri, 11 Oct 2024 11:52:52 -0500 Subject: [PATCH 07/50] chore(frontend): force frontend protobuf version (#11601) --- build.gradle | 1 + 1 file changed, 1 insertion(+) diff --git a/build.gradle b/build.gradle index 79a4ca9384d28..67968ce3ee290 100644 --- a/build.gradle +++ b/build.gradle @@ -398,6 +398,7 @@ subprojects { implementation("com.fasterxml.jackson.core:jackson-databind:$jacksonVersion") implementation("com.fasterxml.jackson.core:jackson-dataformat-cbor:$jacksonVersion") implementation(externalDependency.commonsIo) + implementation(externalDependency.protobuf) } } From f13dae1cd2f6659564755bf2bb5a4ddb70ab1a87 Mon Sep 17 00:00:00 2001 From: dushayntAW <158567391+dushayntAW@users.noreply.github.com> Date: Fri, 11 Oct 2024 19:46:01 +0200 Subject: [PATCH 08/50] fix(airflow): add dag AllowDenyPattern config (#11472) Co-authored-by: Harshal Sheth --- docs/lineage/airflow.md | 3 +- .../src/datahub_airflow_plugin/_config.py | 12 ++++++- .../datahub_listener.py | 18 ++++++++-- .../tests/integration/dags/dag_to_skip.py | 34 ++++++++++++++++++ .../tests/integration/test_plugin.py | 36 +++++++++++-------- 5 files changed, 85 insertions(+), 18 deletions(-) create mode 100644 metadata-ingestion-modules/airflow-plugin/tests/integration/dags/dag_to_skip.py diff --git a/docs/lineage/airflow.md b/docs/lineage/airflow.md index aca6d30619ea8..35f2ff862e695 100644 --- a/docs/lineage/airflow.md +++ b/docs/lineage/airflow.md @@ -132,7 +132,7 @@ conn_id = datahub_rest_default # or datahub_kafka_default ``` | Name | Default value | Description | -| -------------------------- | -------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +|----------------------------|----------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | enabled | true | If the plugin should be enabled. | | conn_id | datahub_rest_default | The name of the datahub connection you set in step 1. | | cluster | prod | name of the airflow cluster | @@ -145,6 +145,7 @@ conn_id = datahub_rest_default # or datahub_kafka_default | datajob_url_link | taskinstance | If taskinstance, the datajob url will be taskinstance link on airflow. It can also be grid. | | | | graceful_exceptions | true | If set to true, most runtime errors in the lineage backend will be suppressed and will not cause the overall task to fail. Note that configuration issues will still throw exceptions. | +| dag_filter_str | { "allow": [".*"] } | AllowDenyPattern value in form of JSON string to filter the DAGs from running. | #### Validate that the plugin is working diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_config.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_config.py index 8deba22a107ce..c4964712cf9f7 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_config.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_config.py @@ -3,7 +3,8 @@ import datahub.emitter.mce_builder as builder from airflow.configuration import conf -from datahub.configuration.common import ConfigModel +from datahub.configuration.common import AllowDenyPattern, ConfigModel +from pydantic.fields import Field if TYPE_CHECKING: from datahub_airflow_plugin.hooks.datahub import DatahubGenericHook @@ -56,6 +57,11 @@ class DatahubLineageConfig(ConfigModel): # Makes extraction of jinja-templated fields more accurate. render_templates: bool = True + dag_filter_pattern: AllowDenyPattern = Field( + default=AllowDenyPattern.allow_all(), + description="regex patterns for DAGs to ingest", + ) + log_level: Optional[str] = None debug_emitter: bool = False @@ -93,6 +99,9 @@ def get_lineage_config() -> DatahubLineageConfig: datajob_url_link = conf.get( "datahub", "datajob_url_link", fallback=DatajobUrl.TASKINSTANCE.value ) + dag_filter_pattern = AllowDenyPattern.parse_raw( + conf.get("datahub", "dag_filter_str", fallback='{"allow": [".*"]}') + ) return DatahubLineageConfig( enabled=enabled, @@ -109,4 +118,5 @@ def get_lineage_config() -> DatahubLineageConfig: disable_openlineage_plugin=disable_openlineage_plugin, datajob_url_link=datajob_url_link, render_templates=render_templates, + dag_filter_pattern=dag_filter_pattern, ) diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py index b818b76de9f7f..d1c7e996dd03d 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py @@ -383,9 +383,15 @@ def on_task_instance_running( return logger.debug( - f"DataHub listener got notification about task instance start for {task_instance.task_id}" + f"DataHub listener got notification about task instance start for {task_instance.task_id} of dag {task_instance.dag_run.dag_id}" ) + if not self.config.dag_filter_pattern.allowed(task_instance.dag_run.dag_id): + logger.debug( + f"DAG {task_instance.dag_run.dag_id} is not allowed by the pattern" + ) + return + if self.config.render_templates: task_instance = _render_templates(task_instance) @@ -492,6 +498,10 @@ def on_task_instance_finish( dag: "DAG" = task.dag # type: ignore[assignment] + if not self.config.dag_filter_pattern.allowed(dag.dag_id): + logger.debug(f"DAG {dag.dag_id} is not allowed by the pattern") + return + datajob = AirflowGenerator.generate_datajob( cluster=self.config.cluster, task=task, @@ -689,8 +699,12 @@ def on_dag_run_running(self, dag_run: "DagRun", msg: str) -> None: f"DataHub listener got notification about dag run start for {dag_run.dag_id}" ) - self.on_dag_start(dag_run) + assert dag_run.dag_id + if not self.config.dag_filter_pattern.allowed(dag_run.dag_id): + logger.debug(f"DAG {dag_run.dag_id} is not allowed by the pattern") + return + self.on_dag_start(dag_run) self.emitter.flush() # TODO: Add hooks for on_dag_run_success, on_dag_run_failed -> call AirflowGenerator.complete_dataflow diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/dag_to_skip.py b/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/dag_to_skip.py new file mode 100644 index 0000000000000..a805a2219d142 --- /dev/null +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/dag_to_skip.py @@ -0,0 +1,34 @@ +from datetime import datetime + +from airflow import DAG +from airflow.operators.bash import BashOperator + +from datahub_airflow_plugin.entities import Dataset, Urn + +with DAG( + "dag_to_skip", + start_date=datetime(2023, 1, 1), + schedule_interval=None, + catchup=False, +) as dag: + task1 = BashOperator( + task_id="dag_to_skip_task_1", + dag=dag, + bash_command="echo 'dag_to_skip_task_1'", + inlets=[ + Dataset(platform="snowflake", name="mydb.schema.tableA"), + Urn( + "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)" + ), + Urn("urn:li:dataJob:(urn:li:dataFlow:(airflow,test_dag,PROD),test_task)"), + ], + outlets=[Dataset("snowflake", "mydb.schema.tableD")], + ) + + task2 = BashOperator( + task_id="dag_to_skip_task_2", + dag=dag, + bash_command="echo 'dag_to_skip_task_2'", + ) + + task1 >> task2 diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/test_plugin.py b/metadata-ingestion-modules/airflow-plugin/tests/integration/test_plugin.py index 37cd3b792d535..44efd94f834b1 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/integration/test_plugin.py +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/test_plugin.py @@ -33,6 +33,8 @@ DAGS_FOLDER = pathlib.Path(__file__).parent / "dags" GOLDENS_FOLDER = pathlib.Path(__file__).parent / "goldens" +DAG_TO_SKIP_INGESTION = "dag_to_skip" + @dataclasses.dataclass class AirflowInstance: @@ -140,6 +142,7 @@ def _run_airflow( # Configure the datahub plugin and have it write the MCPs to a file. "AIRFLOW__CORE__LAZY_LOAD_PLUGINS": "False" if is_v1 else "True", "AIRFLOW__DATAHUB__CONN_ID": datahub_connection_name, + "AIRFLOW__DATAHUB__DAG_FILTER_STR": f'{{ "deny": ["{DAG_TO_SKIP_INGESTION}"] }}', f"AIRFLOW_CONN_{datahub_connection_name.upper()}": Connection( conn_id="datahub_file_default", conn_type="datahub-file", @@ -276,6 +279,7 @@ class DagTestCase: test_cases = [ DagTestCase("simple_dag"), DagTestCase("basic_iolets"), + DagTestCase("dag_to_skip", v2_only=True), DagTestCase("snowflake_operator", success=False, v2_only=True), DagTestCase("sqlite_operator", v2_only=True), DagTestCase("custom_operator_dag", v2_only=True), @@ -373,20 +377,24 @@ def test_airflow_plugin( print("Sleeping for a few seconds to let the plugin finish...") time.sleep(10) - _sanitize_output_file(airflow_instance.metadata_file) - - check_golden_file( - pytestconfig=pytestconfig, - output_path=airflow_instance.metadata_file, - golden_path=golden_path, - ignore_paths=[ - # TODO: If we switched to Git urls, maybe we could get this to work consistently. - r"root\[\d+\]\['aspect'\]\['json'\]\['customProperties'\]\['datahub_sql_parser_error'\]", - r"root\[\d+\]\['aspect'\]\['json'\]\['customProperties'\]\['openlineage_.*'\]", - r"root\[\d+\]\['aspect'\]\['json'\]\['customProperties'\]\['log_url'\]", - r"root\[\d+\]\['aspect'\]\['json'\]\['externalUrl'\]", - ], - ) + if dag_id == DAG_TO_SKIP_INGESTION: + # Verify that no MCPs were generated. + assert not os.path.exists(airflow_instance.metadata_file) + else: + _sanitize_output_file(airflow_instance.metadata_file) + + check_golden_file( + pytestconfig=pytestconfig, + output_path=airflow_instance.metadata_file, + golden_path=golden_path, + ignore_paths=[ + # TODO: If we switched to Git urls, maybe we could get this to work consistently. + r"root\[\d+\]\['aspect'\]\['json'\]\['customProperties'\]\['datahub_sql_parser_error'\]", + r"root\[\d+\]\['aspect'\]\['json'\]\['customProperties'\]\['openlineage_.*'\]", + r"root\[\d+\]\['aspect'\]\['json'\]\['customProperties'\]\['log_url'\]", + r"root\[\d+\]\['aspect'\]\['json'\]\['externalUrl'\]", + ], + ) def _sanitize_output_file(output_path: pathlib.Path) -> None: From 36fc0c4e23ec036af01f07017e0380677c4bef9b Mon Sep 17 00:00:00 2001 From: Gabe Lyons Date: Fri, 11 Oct 2024 13:23:54 -0700 Subject: [PATCH 09/50] =?UTF-8?q?docs(structured-properties):=20example=20?= =?UTF-8?q?to=20read=20structured=20properties=20fr=E2=80=A6=20(#11603)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/api/tutorials/structured-properties.md | 46 ++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/docs/api/tutorials/structured-properties.md b/docs/api/tutorials/structured-properties.md index 00e992f2bd0bb..9b18aa922290b 100644 --- a/docs/api/tutorials/structured-properties.md +++ b/docs/api/tutorials/structured-properties.md @@ -532,6 +532,50 @@ Or you can run the following command to view the properties associated with the datahub dataset get --urn {urn} ``` +## Read Structured Properties From a Dataset + +For reading all structured properties from a dataset: + + + + +```graphql +query getDataset { + dataset(urn: "urn:li:dataset:(urn:li:dataPlatform:snowflake,long_tail_companions.ecommerce.customer,PROD)") { + structuredProperties { + properties { + structuredProperty { + urn + type + definition { + displayName + description + allowedValues { + description + } + } + } + values { + ... on StringValue { + stringValue + } + ... on NumberValue { + numberValue + } + } + valueEntities { + urn + type + } + } + } + } +} +``` + + + + ## Remove Structured Properties From a Dataset For removing a structured property or list of structured properties from a dataset: @@ -1733,4 +1777,4 @@ Example Response: ``` - \ No newline at end of file + From b5cf729c9be29d44e6e499ba3a6971ed23efc0f6 Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Fri, 11 Oct 2024 15:29:58 -0500 Subject: [PATCH 10/50] fix(bootstrap): fix early bootstrap mcps (#11605) --- .../configuration/src/main/resources/bootstrap_mcps.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/metadata-service/configuration/src/main/resources/bootstrap_mcps.yaml b/metadata-service/configuration/src/main/resources/bootstrap_mcps.yaml index b1612f95f9219..10ae176b2c31e 100644 --- a/metadata-service/configuration/src/main/resources/bootstrap_mcps.yaml +++ b/metadata-service/configuration/src/main/resources/bootstrap_mcps.yaml @@ -14,18 +14,26 @@ bootstrap: - name: data-platforms version: v1 + blocking: true + async: false mcps_location: "bootstrap_mcps/data-platforms.yaml" - name: data-types version: v1 + blocking: true + async: false mcps_location: "bootstrap_mcps/data-types.yaml" - name: ownership-types version: v1 + blocking: true + async: false mcps_location: "bootstrap_mcps/ownership-types.yaml" - name: roles version: v1 + blocking: true + async: false mcps_location: "bootstrap_mcps/roles.yaml" # Ingestion Recipes From 413331933a9fd7f56931faad50587ebc2b0de4cb Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Fri, 11 Oct 2024 19:08:25 -0500 Subject: [PATCH 11/50] fix(spark-lineage-legacy): fix check jar script (#11608) --- .../java/spark-lineage-legacy/scripts/check_jar.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/metadata-integration/java/spark-lineage-legacy/scripts/check_jar.sh b/metadata-integration/java/spark-lineage-legacy/scripts/check_jar.sh index 81d6a541d1c2a..854c4227d08d9 100755 --- a/metadata-integration/java/spark-lineage-legacy/scripts/check_jar.sh +++ b/metadata-integration/java/spark-lineage-legacy/scripts/check_jar.sh @@ -40,7 +40,8 @@ jar -tvf $jarFile |\ grep -v "rootdoc.txt" |\ grep -v "VersionInfo.java" |\ grep -v "mime.types" |\ - grep -v "com/ibm/.*" + grep -v "com/ibm/.*" |\ + grep -v "google/" if [ $? -ne 0 ]; then From 38ac1007d6a8e1e22291b9f0a1491734dfd44812 Mon Sep 17 00:00:00 2001 From: Shirshanka Das Date: Mon, 14 Oct 2024 01:25:19 -0700 Subject: [PATCH 12/50] fix(sdk): platform resource api for non existent resources (#11610) --- .../entities/platformresource/platform_resource.py | 7 +++++++ .../platform_resources/test_platform_resource.py | 13 +++++++++++++ 2 files changed, 20 insertions(+) diff --git a/metadata-ingestion/src/datahub/api/entities/platformresource/platform_resource.py b/metadata-ingestion/src/datahub/api/entities/platformresource/platform_resource.py index 2b730ccb86f51..1556a67a9e555 100644 --- a/metadata-ingestion/src/datahub/api/entities/platformresource/platform_resource.py +++ b/metadata-ingestion/src/datahub/api/entities/platformresource/platform_resource.py @@ -186,10 +186,17 @@ def to_datahub(self, graph_client: DataHubGraph) -> None: def from_datahub( cls, graph_client: DataHubGraph, key: Union[PlatformResourceKey, str] ) -> Optional["PlatformResource"]: + """ + Fetches a PlatformResource from the graph given a key. + Key can be either a PlatformResourceKey object or an urn string. + Returns None if the resource is not found. + """ if isinstance(key, PlatformResourceKey): urn = PlatformResourceUrn(id=key.id) else: urn = PlatformResourceUrn.from_string(key) + if not graph_client.exists(str(urn)): + return None platform_resource = graph_client.get_entity_semityped(str(urn)) return cls( id=urn.id, diff --git a/smoke-test/tests/platform_resources/test_platform_resource.py b/smoke-test/tests/platform_resources/test_platform_resource.py index 09d2503179572..7c53f72d843c9 100644 --- a/smoke-test/tests/platform_resources/test_platform_resource.py +++ b/smoke-test/tests/platform_resources/test_platform_resource.py @@ -99,3 +99,16 @@ def test_platform_resource_search(graph_client, test_id, cleanup_resources): ] assert len(search_results) == 1 assert search_results[0] == platform_resource + + +def test_platform_resource_non_existent(graph_client, test_id): + key = PlatformResourceKey( + platform=f"test_platform_{test_id}", + resource_type=f"test_resource_type_{test_id}", + primary_key=f"test_primary_key_{test_id}", + ) + platform_resource = PlatformResource.from_datahub( + key=key, + graph_client=graph_client, + ) + assert platform_resource is None From 3387110b411d91c7a08baaab76e29caa3bcb3a28 Mon Sep 17 00:00:00 2001 From: skrydal Date: Mon, 14 Oct 2024 11:35:34 +0200 Subject: [PATCH 13/50] fix(ingestion/redshift): Fix for Redshift COPY-based lineage (#11552) --- .../ingestion/source/redshift/lineage_v2.py | 13 ++- .../ingestion/source/redshift/query.py | 89 ++++++------------- .../sql_parsing/sql_parsing_aggregator.py | 4 +- 3 files changed, 41 insertions(+), 65 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/lineage_v2.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/lineage_v2.py index 4b7f710beed08..4df64c80bad8a 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redshift/lineage_v2.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/lineage_v2.py @@ -334,19 +334,26 @@ def _process_view_lineage(self, lineage_row: LineageRow) -> None: ) def _process_copy_command(self, lineage_row: LineageRow) -> None: - source = self._lineage_v1._get_sources( + logger.debug(f"Processing COPY command for lineage row: {lineage_row}") + sources = self._lineage_v1._get_sources( lineage_type=LineageCollectorType.COPY, db_name=self.database, source_schema=None, source_table=None, ddl=None, filename=lineage_row.filename, - )[0] + ) + logger.debug(f"Recognized sources: {sources}") + source = sources[0] if not source: + logger.debug("Ignoring command since couldn't recognize proper source") return s3_urn = source[0].urn - + logger.debug(f"Recognized s3 dataset urn: {s3_urn}") if not lineage_row.target_schema or not lineage_row.target_table: + logger.debug( + f"Didn't find target schema (found: {lineage_row.target_schema}) or target table (found: {lineage_row.target_table})" + ) return target = self._make_filtered_target(lineage_row) if not target: diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/query.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/query.py index affbcd00b5107..39370b93b561c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redshift/query.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/query.py @@ -283,6 +283,34 @@ def alter_table_rename_query( AND SYS.query_text ILIKE '%alter table % rename to %' """ + @staticmethod + def list_copy_commands_sql( + db_name: str, start_time: datetime, end_time: datetime + ) -> str: + return """ + select + distinct + "schema" as target_schema, + "table" as target_table, + c.file_name as filename + from + SYS_QUERY_DETAIL as si + join SYS_LOAD_DETAIL as c on + si.query_id = c.query_id + join SVV_TABLE_INFO sti on + sti.table_id = si.table_id + where + database = '{db_name}' + and si.start_time >= '{start_time}' + and si.start_time < '{end_time}' + order by target_schema, target_table, si.start_time asc + """.format( + # We need the original database name for filtering + db_name=db_name, + start_time=start_time.strftime(redshift_datetime_format), + end_time=end_time.strftime(redshift_datetime_format), + ) + @staticmethod def additional_table_metadata_query() -> str: raise NotImplementedError @@ -317,12 +345,6 @@ def list_insert_create_queries_sql( ) -> str: raise NotImplementedError - @staticmethod - def list_copy_commands_sql( - db_name: str, start_time: datetime, end_time: datetime - ) -> str: - raise NotImplementedError - class RedshiftProvisionedQuery(RedshiftCommonQuery): @staticmethod @@ -536,34 +558,6 @@ def list_insert_create_queries_sql( end_time=end_time.strftime(redshift_datetime_format), ) - @staticmethod - def list_copy_commands_sql( - db_name: str, start_time: datetime, end_time: datetime - ) -> str: - return """ - select - distinct - "schema" as target_schema, - "table" as target_table, - filename - from - stl_insert as si - join stl_load_commits as c on - si.query = c.query - join SVV_TABLE_INFO sti on - sti.table_id = tbl - where - database = '{db_name}' - and si.starttime >= '{start_time}' - and si.starttime < '{end_time}' - order by target_schema, target_table, starttime asc - """.format( - # We need the original database name for filtering - db_name=db_name, - start_time=start_time.strftime(redshift_datetime_format), - end_time=end_time.strftime(redshift_datetime_format), - ) - @staticmethod def temp_table_ddl_query(start_time: datetime, end_time: datetime) -> str: start_time_str: str = start_time.strftime(redshift_datetime_format) @@ -941,33 +935,6 @@ def list_insert_create_queries_sql( # when loading from s3 using prefix with a single file it produces 2 lines (for file and just directory) - also # behaves like this when run in the old way - @staticmethod - def list_copy_commands_sql( - db_name: str, start_time: datetime, end_time: datetime - ) -> str: - return """ - select - distinct - "schema" as target_schema, - "table" as target_table, - c.file_name - from - SYS_QUERY_DETAIL as si - join SYS_LOAD_DETAIL as c on - si.query_id = c.query_id - join SVV_TABLE_INFO sti on - sti.table_id = si.table_id - where - database = '{db_name}' - and si.start_time >= '{start_time}' - and si.start_time < '{end_time}' - order by target_schema, target_table, si.start_time asc - """.format( - # We need the original database name for filtering - db_name=db_name, - start_time=start_time.strftime(redshift_datetime_format), - end_time=end_time.strftime(redshift_datetime_format), - ) # handles "create table IF ..." statements wrong probably - "create command" field contains only "create table if" in such cases # also similar happens if for example table name contains special characters quoted with " i.e. "test-table1" diff --git a/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py b/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py index 52934f9f72a70..5f2709fe42660 100644 --- a/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py +++ b/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py @@ -613,7 +613,9 @@ def add_known_lineage_mapping( upstream_urn: The upstream dataset URN. downstream_urn: The downstream dataset URN. """ - + logger.debug( + f"Adding lineage to the map, downstream: {downstream_urn}, upstream: {upstream_urn}" + ) self.report.num_known_mapping_lineage += 1 # We generate a fake "query" object to hold the lineage. From b74ba11d937ef4b902f320e1bdb39a7ece35ffc4 Mon Sep 17 00:00:00 2001 From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com> Date: Mon, 14 Oct 2024 18:21:05 +0530 Subject: [PATCH 14/50] fix(ingest/delta-lake): skip file count if require_files is false (#11611) --- .../src/datahub/ingestion/source/delta_lake/source.py | 5 ++--- metadata-ingestion/tests/unit/test_mlflow_source.py | 6 ++---- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/delta_lake/source.py b/metadata-ingestion/src/datahub/ingestion/source/delta_lake/source.py index 6a52d8fdd8905..98133ca69011e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/delta_lake/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/delta_lake/source.py @@ -223,15 +223,14 @@ def ingest_table( ) customProperties = { - "number_of_files": str(get_file_count(delta_table)), "partition_columns": str(delta_table.metadata().partition_columns), "table_creation_time": str(delta_table.metadata().created_time), "id": str(delta_table.metadata().id), "version": str(delta_table.version()), "location": self.source_config.complete_path, } - if not self.source_config.require_files: - del customProperties["number_of_files"] # always 0 + if self.source_config.require_files: + customProperties["number_of_files"] = str(get_file_count(delta_table)) dataset_properties = DatasetPropertiesClass( description=delta_table.metadata().description, diff --git a/metadata-ingestion/tests/unit/test_mlflow_source.py b/metadata-ingestion/tests/unit/test_mlflow_source.py index ae5a42bad229d..d213dd92352e6 100644 --- a/metadata-ingestion/tests/unit/test_mlflow_source.py +++ b/metadata-ingestion/tests/unit/test_mlflow_source.py @@ -1,6 +1,6 @@ import datetime from pathlib import Path -from typing import Any, TypeVar, Union +from typing import Any, Union import pytest from mlflow import MlflowClient @@ -11,8 +11,6 @@ from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.source.mlflow import MLflowConfig, MLflowSource -T = TypeVar("T") - @pytest.fixture def tracking_uri(tmp_path: Path) -> str: @@ -46,7 +44,7 @@ def model_version( ) -def dummy_search_func(page_token: Union[None, str], **kwargs: Any) -> PagedList[T]: +def dummy_search_func(page_token: Union[None, str], **kwargs: Any) -> PagedList[str]: dummy_pages = dict( page_1=PagedList(items=["a", "b"], token="page_2"), page_2=PagedList(items=["c", "d"], token="page_3"), From 62c39cdcd3292ed52c6056865a1edb8471139eea Mon Sep 17 00:00:00 2001 From: Semion Sidorenko Date: Mon, 14 Oct 2024 20:57:38 +0200 Subject: [PATCH 15/50] fix(ingest/superset): parse postgres platform correctly (#11540) --- metadata-ingestion/src/datahub/ingestion/source/superset.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/metadata-ingestion/src/datahub/ingestion/source/superset.py b/metadata-ingestion/src/datahub/ingestion/source/superset.py index 858281f880359..4e40407fba908 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/superset.py +++ b/metadata-ingestion/src/datahub/ingestion/source/superset.py @@ -243,6 +243,8 @@ def get_platform_from_database_id(self, database_id): return "athena" if platform_name == "clickhousedb": return "clickhouse" + if platform_name == "postgresql": + return "postgres" return platform_name @lru_cache(maxsize=None) From 09d70bc02c33da5c3f011cd4032fd06b4154e5d7 Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Mon, 14 Oct 2024 14:27:36 -0500 Subject: [PATCH 16/50] feat(openapi-v3): support async and createIfNotExists params on aspect (#11609) --- .../metadata/entity/EntityServiceImpl.java | 1 + .../controller/GenericEntitiesController.java | 37 +++++++++++++++---- .../v2/controller/EntityController.java | 27 +++++++++++++- .../openapi/v3/OpenAPIV3Generator.java | 22 +++++++++++ .../v3/controller/EntityController.java | 31 +++++++++++++++- .../metadata/entity/IngestResult.java | 2 + 6 files changed, 110 insertions(+), 10 deletions(-) diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java index 34c98bba01af4..00feb547ca330 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java @@ -1357,6 +1357,7 @@ private Stream ingestProposalSync( return IngestResult.builder() .urn(item.getUrn()) .request(item) + .result(result) .publishedMCL(result.getMclFuture() != null) .sqlCommitted(true) .isUpdate(result.getOldValue() != null) diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java index 7e7929e7f27d3..7427f293c848f 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java @@ -170,6 +170,9 @@ protected abstract E buildGenericEntity( @Nonnull UpdateAspectResult updateAspectResult, boolean withSystemMetadata); + protected abstract E buildGenericEntity( + @Nonnull String aspectName, @Nonnull IngestResult ingestResult, boolean withSystemMetadata); + protected abstract AspectsBatch toMCPBatch( @Nonnull OperationContext opContext, String entityArrayList, Actor actor) throws JsonProcessingException, InvalidUrnException; @@ -560,8 +563,11 @@ public ResponseEntity createAspect( @PathVariable("entityName") String entityName, @PathVariable("entityUrn") String entityUrn, @PathVariable("aspectName") String aspectName, + @RequestParam(value = "async", required = false, defaultValue = "false") Boolean async, @RequestParam(value = "systemMetadata", required = false, defaultValue = "false") Boolean withSystemMetadata, + @RequestParam(value = "createIfEntityNotExists", required = false, defaultValue = "false") + Boolean createIfEntityNotExists, @RequestParam(value = "createIfNotExists", required = false, defaultValue = "true") Boolean createIfNotExists, @RequestBody @Nonnull String jsonAspect) @@ -591,24 +597,38 @@ public ResponseEntity createAspect( opContext.getRetrieverContext().get().getAspectRetriever(), urn, aspectSpec, + createIfEntityNotExists, createIfNotExists, jsonAspect, authentication.getActor()); - List results = - entityService.ingestAspects( + Set results = + entityService.ingestProposal( opContext, AspectsBatchImpl.builder() .retrieverContext(opContext.getRetrieverContext().get()) .items(List.of(upsert)) .build(), - true, - true); + async); - return ResponseEntity.of( - results.stream() - .findFirst() - .map(result -> buildGenericEntity(aspectName, result, withSystemMetadata))); + if (!async) { + return ResponseEntity.of( + results.stream() + .filter(item -> aspectName.equals(item.getRequest().getAspectName())) + .findFirst() + .map( + result -> + buildGenericEntity(aspectName, result.getResult(), withSystemMetadata))); + } else { + return results.stream() + .filter(item -> aspectName.equals(item.getRequest().getAspectName())) + .map( + result -> + ResponseEntity.accepted() + .body(buildGenericEntity(aspectName, result, withSystemMetadata))) + .findFirst() + .orElse(ResponseEntity.accepted().build()); + } } @Tag(name = "Generic Aspects") @@ -789,6 +809,7 @@ protected abstract ChangeMCP toUpsertItem( @Nonnull AspectRetriever aspectRetriever, Urn entityUrn, AspectSpec aspectSpec, + Boolean createIfEntityNotExists, Boolean createIfNotExists, String jsonAspect, Actor actor) diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java index 28537b849b68a..7bec052a9fd5d 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java @@ -232,6 +232,20 @@ protected GenericEntityV2 buildGenericEntity( withSystemMetadata ? updateAspectResult.getNewSystemMetadata() : null))); } + @Override + protected GenericEntityV2 buildGenericEntity( + @Nonnull String aspectName, @Nonnull IngestResult ingestResult, boolean withSystemMetadata) { + return GenericEntityV2.builder() + .urn(ingestResult.getUrn().toString()) + .build( + objectMapper, + Map.of( + aspectName, + Pair.of( + ingestResult.getRequest().getRecordTemplate(), + withSystemMetadata ? ingestResult.getRequest().getSystemMetadata() : null))); + } + private List toRecordTemplates( @Nonnull OperationContext opContext, SearchEntityArray searchEntities, @@ -278,14 +292,25 @@ protected ChangeMCP toUpsertItem( @Nonnull AspectRetriever aspectRetriever, Urn entityUrn, AspectSpec aspectSpec, + Boolean createIfEntityNotExists, Boolean createIfNotExists, String jsonAspect, Actor actor) throws URISyntaxException { + + final ChangeType changeType; + if (Boolean.TRUE.equals(createIfEntityNotExists)) { + changeType = ChangeType.CREATE_ENTITY; + } else if (Boolean.TRUE.equals(createIfNotExists)) { + changeType = ChangeType.CREATE; + } else { + changeType = ChangeType.UPSERT; + } + return ChangeItemImpl.builder() .urn(entityUrn) .aspectName(aspectSpec.getName()) - .changeType(Boolean.TRUE.equals(createIfNotExists) ? ChangeType.CREATE : ChangeType.UPSERT) + .changeType(changeType) .auditStamp(AuditStampUtils.createAuditStamp(actor.toUrnStr())) .recordTemplate( GenericRecordUtils.deserializeAspect( diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java index e33ad24a6c248..d179ea8f3a068 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java @@ -1100,6 +1100,28 @@ private static PathItem buildSingleEntityAspectPath( new Operation() .summary(String.format("Create aspect %s on %s ", aspect, upperFirstEntity)) .tags(tags) + .parameters( + List.of( + new Parameter() + .in(NAME_QUERY) + .name("async") + .description("Use async ingestion for high throughput.") + .schema(new Schema().type(TYPE_BOOLEAN)._default(false)), + new Parameter() + .in(NAME_QUERY) + .name(NAME_SYSTEM_METADATA) + .description("Include systemMetadata with response.") + .schema(new Schema().type(TYPE_BOOLEAN)._default(false)), + new Parameter() + .in(NAME_QUERY) + .name("createIfEntityNotExists") + .description("Only create the aspect if the Entity doesn't exist.") + .schema(new Schema().type(TYPE_BOOLEAN)._default(false)), + new Parameter() + .in(NAME_QUERY) + .name("createIfNotExists") + .description("Only create the aspect if the Aspect doesn't exist.") + .schema(new Schema().type(TYPE_BOOLEAN)._default(true)))) .requestBody(requestBody) .responses(new ApiResponses().addApiResponse("201", successPostResponse)); // Patch Operation diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java index c7d8c72f8a1c3..55cf310be3438 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java @@ -328,6 +328,24 @@ protected GenericEntityV3 buildGenericEntity( .build())); } + @Override + protected GenericEntityV3 buildGenericEntity( + @Nonnull String aspectName, @Nonnull IngestResult ingestResult, boolean withSystemMetadata) { + return GenericEntityV3.builder() + .build( + objectMapper, + ingestResult.getUrn(), + Map.of( + aspectName, + AspectItem.builder() + .aspect(ingestResult.getRequest().getRecordTemplate()) + .systemMetadata( + withSystemMetadata ? ingestResult.getRequest().getSystemMetadata() : null) + .auditStamp( + withSystemMetadata ? ingestResult.getRequest().getAuditStamp() : null) + .build())); + } + private List toRecordTemplates( @Nonnull OperationContext opContext, SearchEntityArray searchEntities, @@ -472,16 +490,27 @@ protected ChangeMCP toUpsertItem( @Nonnull AspectRetriever aspectRetriever, Urn entityUrn, AspectSpec aspectSpec, + Boolean createIfEntityNotExists, Boolean createIfNotExists, String jsonAspect, Actor actor) throws JsonProcessingException { JsonNode jsonNode = objectMapper.readTree(jsonAspect); String aspectJson = jsonNode.get("value").toString(); + + final ChangeType changeType; + if (Boolean.TRUE.equals(createIfEntityNotExists)) { + changeType = ChangeType.CREATE_ENTITY; + } else if (Boolean.TRUE.equals(createIfNotExists)) { + changeType = ChangeType.CREATE; + } else { + changeType = ChangeType.UPSERT; + } + return ChangeItemImpl.builder() .urn(entityUrn) .aspectName(aspectSpec.getName()) - .changeType(Boolean.TRUE.equals(createIfNotExists) ? ChangeType.CREATE : ChangeType.UPSERT) + .changeType(changeType) .auditStamp(AuditStampUtils.createAuditStamp(actor.toUrnStr())) .recordTemplate( GenericRecordUtils.deserializeAspect( diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/IngestResult.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/IngestResult.java index d3f8b507bb14a..f8b76db110c08 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/IngestResult.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/IngestResult.java @@ -2,6 +2,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.metadata.aspect.batch.BatchItem; +import javax.annotation.Nullable; import lombok.Builder; import lombok.Value; @@ -10,6 +11,7 @@ public class IngestResult { Urn urn; BatchItem request; + @Nullable UpdateAspectResult result; boolean publishedMCL; boolean processedMCL; boolean publishedMCP; From 3078c5a4c982d57ffe308c35acb1d6d03e8c0ad1 Mon Sep 17 00:00:00 2001 From: Andrew Sikowitz Date: Mon, 14 Oct 2024 13:13:17 -0700 Subject: [PATCH 17/50] fix(ingest/preset): Add skip_on_failure to root_validator decorator (#11615) --- metadata-ingestion/src/datahub/ingestion/source/preset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/preset.py b/metadata-ingestion/src/datahub/ingestion/source/preset.py index e51520898103d..6f53223e000f1 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/preset.py +++ b/metadata-ingestion/src/datahub/ingestion/source/preset.py @@ -56,7 +56,7 @@ class PresetConfig(SupersetConfig): def remove_trailing_slash(cls, v): return config_clean.remove_trailing_slashes(v) - @root_validator + @root_validator(skip_on_failure=True) def default_display_uri_to_connect_uri(cls, values): base = values.get("display_uri") if base is None: From e0939c7ecd6e9b68a8e1def929dcb50dde1d46df Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Mon, 14 Oct 2024 17:14:55 -0700 Subject: [PATCH 18/50] docs(apis): update OpenAPI disclaimer (#11617) --- docs/api/datahub-apis.md | 132 +++++++++++++++++++-------------------- 1 file changed, 65 insertions(+), 67 deletions(-) diff --git a/docs/api/datahub-apis.md b/docs/api/datahub-apis.md index 6bb793a59a86e..c46aacde3a0cb 100644 --- a/docs/api/datahub-apis.md +++ b/docs/api/datahub-apis.md @@ -2,18 +2,16 @@ DataHub has several APIs to manipulate metadata on the platform. Here's the list of APIs and their pros and cons to help you choose the right one for your use case. -| API | Definition | Pros | Cons | -|--------------------------------------------------------------------------------|------------------------------------|------------------------------------------|-------------------------------------------------------------| -| **[Python SDK](/metadata-ingestion/as-a-library.md)** | SDK | Highly flexible, Good for bulk execution | Requires an understanding of the metadata change event | -| **[Java SDK](/metadata-integration/java/as-a-library.md)** | SDK | Highly flexible, Good for bulk execution | Requires an understanding of the metadata change event | -| **[GraphQL API](docs/api/graphql/getting-started.md)** | GraphQL interface | Intuitive; mirrors UI capabilities | Less flexible than SDKs; requires knowledge of GraphQL syntax | -| **[OpenAPI](docs/api/openapi/openapi-usage-guide.md)**
(Not Recommended) | Lower-level API for advanced users | | Generally not recommended for typical use cases | +| API | Definition | Pros | Cons | +| ---------------------------------------------------------- | ---------------------------------- | ---------------------------------------- | ----------------------------------------------------------------------- | +| **[Python SDK](/metadata-ingestion/as-a-library.md)** | SDK | Highly flexible, Good for bulk execution | Requires an understanding of the metadata change event | +| **[Java SDK](/metadata-integration/java/as-a-library.md)** | SDK | Highly flexible, Good for bulk execution | Requires an understanding of the metadata change event | +| **[GraphQL API](docs/api/graphql/getting-started.md)** | GraphQL interface | Intuitive; mirrors UI capabilities | Less flexible than SDKs; requires knowledge of GraphQL syntax | +| **[OpenAPI](docs/api/openapi/openapi-usage-guide.md)** | Lower-level API for advanced users | Most powerful and flexible | Can be hard to use for straightforward use cases; no corresponding SDKs | In general, **Python and Java SDKs** are our most recommended tools for extending and customizing the behavior of your DataHub instance. We don't recommend using the **OpenAPI** directly, as it's more complex and less user-friendly than the other APIs. - - ## Python and Java SDK We offer an SDK for both Python and Java that provide full functionality when it comes to CRUD operations and any complex functionality you may want to build into DataHub. We recommend using the SDKs for most use cases. Here are the examples of how to use the SDKs: @@ -23,22 +21,22 @@ We offer an SDK for both Python and Java that provide full functionality when it - Creating custom metadata entities Learn more about the SDKs: + - **[Python SDK →](/metadata-ingestion/as-a-library.md)** - **[Java SDK →](/metadata-integration/java/as-a-library.md)** - ## GraphQL API The `graphql` API serves as the primary public API for the platform. It can be used to fetch and update metadata programatically in the language of your choice. Intended as a higher-level API that simplifies the most common operations. We recommend using the GraphQL API if you're getting started with DataHub since it's more user-friendly and straighfowrad. Here are some examples of how to use the GraphQL API: + - Search for datasets with conditions - Update a certain field of a dataset Learn more about the GraphQL API: -- **[GraphQL API →](docs/api/graphql/getting-started.md)** - +- **[GraphQL API →](docs/api/graphql/getting-started.md)** ## DataHub API Comparison @@ -47,59 +45,59 @@ Here's an overview of what each API can do. > Last Updated : Feb 16 2024 -| Feature | GraphQL | Python SDK | OpenAPI | -|------------------------------------|------------------------------------------------------------------------------|------------------------------------------------------------------------------|---------| -| Create a Dataset | 🚫 | ✅ [[Guide]](/docs/api/tutorials/datasets.md) | ✅ | -| Delete a Dataset (Soft Delete) | ✅ [[Guide]](/docs/api/tutorials/datasets.md#delete-dataset) | ✅ [[Guide]](/docs/api/tutorials/datasets.md#delete-dataset) | ✅ | -| Delete a Dataset (Hard Delete) | 🚫 | ✅ [[Guide]](/docs/api/tutorials/datasets.md#delete-dataset) | ✅ | -| Search a Dataset | ✅ [[Guide]](/docs/how/search.md#graphql) | ✅ | ✅ | -| Read a Dataset Deprecation | ✅ | ✅ | ✅ | -| Read Dataset Entities (V2) | ✅ | ✅ | ✅ | -| Create a Tag | ✅ [[Guide]](/docs/api/tutorials/tags.md#create-tags) | ✅ [[Guide]](/docs/api/tutorials/tags.md#create-tags) | ✅ | -| Read a Tag | ✅ [[Guide]](/docs/api/tutorials/tags.md#read-tags) | ✅ [[Guide]](/docs/api/tutorials/tags.md#read-tags) | ✅ | -| Add Tags to a Dataset | ✅ [[Guide]](/docs/api/tutorials/tags.md#add-tags-to-a-dataset) | ✅ [[Guide]](/docs/api/tutorials/tags.md#add-tags-to-a-dataset) | ✅ | -| Add Tags to a Column of a Dataset | ✅ [[Guide]](/docs/api/tutorials/tags.md#add-tags-to-a-column-of-a-dataset) | ✅ [[Guide]](/docs/api/tutorials/tags.md#add-tags-to-a-column-of-a-dataset) | ✅ | -| Remove Tags from a Dataset | ✅ [[Guide]](/docs/api/tutorials/tags.md#remove-tags) | ✅ [[Guide]](/docs/api/tutorials/tags.md#add-tags#remove-tags) | ✅ | -| Create Glossary Terms | ✅ [[Guide]](/docs/api/tutorials/terms.md#create-terms) | ✅ [[Guide]](/docs/api/tutorials/terms.md#create-terms) | ✅ | -| Read Terms from a Dataset | ✅ [[Guide]](/docs/api/tutorials/terms.md#read-terms) | ✅ [[Guide]](/docs/api/tutorials/terms.md#read-terms) | ✅ | -| Add Terms to a Column of a Dataset | ✅ [[Guide]](/docs/api/tutorials/terms.md#add-terms-to-a-column-of-a-dataset) | ✅ [[Guide]](/docs/api/tutorials/terms.md#add-terms-to-a-column-of-a-dataset) | ✅ | -| Add Terms to a Dataset | ✅ [[Guide]](/docs/api/tutorials/terms.md#add-terms-to-a-dataset) | ✅ [[Guide]](/docs/api/tutorials/terms.md#add-terms-to-a-dataset) | ✅ | -| Create Domains | ✅ [[Guide]](/docs/api/tutorials/domains.md#create-domain) | ✅ [[Guide]](/docs/api/tutorials/domains.md#create-domain) | ✅ | -| Read Domains | ✅ [[Guide]](/docs/api/tutorials/domains.md#read-domains) | ✅ [[Guide]](/docs/api/tutorials/domains.md#read-domains) | ✅ | -| Add Domains to a Dataset | ✅ [[Guide]](/docs/api/tutorials/domains.md#add-domains) | ✅ [[Guide]](/docs/api/tutorials/domains.md#add-domains) | ✅ | -| Remove Domains from a Dataset | ✅ [[Guide]](/docs/api/tutorials/domains.md#remove-domains) | ✅ [[Guide]](/docs/api/tutorials/domains.md#remove-domains) | ✅ | -| Create / Upsert Users | ✅ [[Guide]](/docs/api/tutorials/owners.md#upsert-users) | ✅ [[Guide]](/docs/api/tutorials/owners.md#upsert-users) | ✅ | -| Create / Upsert Group | ✅ [[Guide]](/docs/api/tutorials/owners.md#upsert-group) | ✅ [[Guide]](/docs/api/tutorials/owners.md#upsert-group) | ✅ | -| Read Owners of a Dataset | ✅ [[Guide]](/docs/api/tutorials/owners.md#read-owners) | ✅ [[Guide]](/docs/api/tutorials/owners.md#read-owners) | ✅ | -| Add Owner to a Dataset | ✅ [[Guide]](/docs/api/tutorials/owners.md#add-owners) | ✅ [[Guide]](/docs/api/tutorials/owners.md#add-owners#remove-owners) | ✅ | -| Remove Owner from a Dataset | ✅ [[Guide]](/docs/api/tutorials/owners.md#remove-owners) | ✅ [[Guide]](/docs/api/tutorials/owners.md) | ✅ | -| Add Lineage | ✅ [[Guide]](/docs/api/tutorials/lineage.md) | ✅ [[Guide]](/docs/api/tutorials/lineage.md#add-lineage) | ✅ | -| Add Column Level (Fine Grained) Lineage | 🚫 | ✅ [[Guide]](docs/api/tutorials/lineage.md#add-column-level-lineage) | ✅ | -| Add Documentation (Description) to a Column of a Dataset | ✅ [[Guide]](/docs/api/tutorials/descriptions.md#add-description-on-column) | ✅ [[Guide]](/docs/api/tutorials/descriptions.md#add-description-on-column) | ✅ | -| Add Documentation (Description) to a Dataset | ✅ [[Guide]](/docs/api/tutorials/descriptions.md#add-description-on-dataset) | ✅ [[Guide]](/docs/api/tutorials/descriptions.md#add-description-on-dataset) | ✅ | -| Add / Remove / Replace Custom Properties on a Dataset | 🚫 | ✅ [[Guide]](/docs/api/tutorials/custom-properties.md) | ✅ | -| Add ML Feature to ML Feature Table | 🚫 | ✅ [[Guide]](/docs/api/tutorials/ml.md#add-mlfeature-to-mlfeaturetable) | ✅ | -| Add ML Feature to MLModel | 🚫 | ✅ [[Guide]](/docs/api/tutorials/ml.md#add-mlfeature-to-mlmodel) | ✅ | -| Add ML Group to MLFeatureTable | 🚫 | ✅ [[Guide]](/docs/api/tutorials/ml.md#add-mlgroup-to-mlfeaturetable) | ✅ | -| Create MLFeature | 🚫 | ✅ [[Guide]](/docs/api/tutorials/ml.md#create-mlfeature) | ✅ | -| Create MLFeatureTable | 🚫 | ✅ [[Guide]](/docs/api/tutorials/ml.md#create-mlfeaturetable) | ✅ | -| Create MLModel | 🚫 | ✅ [[Guide]](/docs/api/tutorials/ml.md#create-mlmodel) | ✅ | -| Create MLModelGroup | 🚫 | ✅ [[Guide]](/docs/api/tutorials/ml.md#create-mlmodelgroup) | ✅ | -| Create MLPrimaryKey | 🚫 | ✅ [[Guide]](/docs/api/tutorials/ml.md#create-mlprimarykey) | ✅ | -| Create MLFeatureTable | 🚫 | ✅ [[Guide]](/docs/api/tutorials/ml.md#create-mlfeaturetable)| ✅ | -| Read MLFeature | ✅ [[Guide]](/docs/api/tutorials/ml.md#read-mlfeature) | ✅ [[Guide]](/docs/api/tutorials/ml.md#read-mlfeature) | ✅ | -| Read MLFeatureTable | ✅ [[Guide]](/docs/api/tutorials/ml.md#read-mlfeaturetable) | ✅ [[Guide]](/docs/api/tutorials/ml.md#read-mlfeaturetable) | ✅ | -| Read MLModel | ✅ [[Guide]](/docs/api/tutorials/ml.md#read-mlmodel) | ✅ [[Guide]](/docs/api/tutorials/ml.md#read-mlmodel) | ✅ | -| Read MLModelGroup | ✅ [[Guide]](/docs/api/tutorials/ml.md#read-mlmodelgroup) | ✅ [[Guide]](/docs/api/tutorials/ml.md#read-mlmodelgroup) | ✅ | -| Read MLPrimaryKey | ✅ [[Guide]](/docs/api/tutorials/ml.md#read-mlprimarykey) | ✅ [[Guide]](/docs/api/tutorials/ml.md#read-mlprimarykey) | ✅ | -| Create Data Product | 🚫 | ✅ [[Code]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/create_dataproduct.py) | ✅ | -| Create Lineage Between Chart and Dashboard | 🚫 | ✅ [[Code]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_chart_dashboard.py) | ✅ | -| Create Lineage Between Dataset and Chart | 🚫 | ✅ [[Code]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_dataset_chart.py) | ✅ | -| Create Lineage Between Dataset and DataJob | 🚫 | ✅ [[Code]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_dataset_job_dataset.py) | ✅ | -| Create Finegrained Lineage as DataJob for Dataset | 🚫 | ✅ [[Code]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_emitter_datajob_finegrained.py) | ✅ | -| Create Finegrained Lineage for Dataset | 🚫 | ✅ [[Code]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_emitter_dataset_finegrained.py) | ✅ | -| Create Dataset Lineage with Kafka | 🚫 | ✅ [[Code]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_emitter_kafka.py) | ✅ | -| Create Dataset Lineage with MCPW & Rest Emitter | 🚫 | ✅ [[Code]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_emitter_mcpw_rest.py) | ✅ | -| Create Dataset Lineage with Rest Emitter | 🚫 | ✅ [[Code]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_emitter_rest.py) | ✅ | -| Create DataJob with Dataflow | 🚫 | ✅ [[Code]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_job_dataflow.py) [[Simple]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_job_dataflow_new_api_simple.py) [[Verbose]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_job_dataflow_new_api_verbose.py) | ✅ | -| Create Programmatic Pipeline | 🚫 | ✅ [[Code]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/programatic_pipeline.py) | ✅ | +| Feature | GraphQL | Python SDK | OpenAPI | +| -------------------------------------------------------- | ----------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------- | +| Create a Dataset | 🚫 | ✅ [[Guide]](/docs/api/tutorials/datasets.md) | ✅ | +| Delete a Dataset (Soft Delete) | ✅ [[Guide]](/docs/api/tutorials/datasets.md#delete-dataset) | ✅ [[Guide]](/docs/api/tutorials/datasets.md#delete-dataset) | ✅ | +| Delete a Dataset (Hard Delete) | 🚫 | ✅ [[Guide]](/docs/api/tutorials/datasets.md#delete-dataset) | ✅ | +| Search a Dataset | ✅ [[Guide]](/docs/how/search.md#graphql) | ✅ | ✅ | +| Read a Dataset Deprecation | ✅ | ✅ | ✅ | +| Read Dataset Entities (V2) | ✅ | ✅ | ✅ | +| Create a Tag | ✅ [[Guide]](/docs/api/tutorials/tags.md#create-tags) | ✅ [[Guide]](/docs/api/tutorials/tags.md#create-tags) | ✅ | +| Read a Tag | ✅ [[Guide]](/docs/api/tutorials/tags.md#read-tags) | ✅ [[Guide]](/docs/api/tutorials/tags.md#read-tags) | ✅ | +| Add Tags to a Dataset | ✅ [[Guide]](/docs/api/tutorials/tags.md#add-tags-to-a-dataset) | ✅ [[Guide]](/docs/api/tutorials/tags.md#add-tags-to-a-dataset) | ✅ | +| Add Tags to a Column of a Dataset | ✅ [[Guide]](/docs/api/tutorials/tags.md#add-tags-to-a-column-of-a-dataset) | ✅ [[Guide]](/docs/api/tutorials/tags.md#add-tags-to-a-column-of-a-dataset) | ✅ | +| Remove Tags from a Dataset | ✅ [[Guide]](/docs/api/tutorials/tags.md#remove-tags) | ✅ [[Guide]](/docs/api/tutorials/tags.md#add-tags#remove-tags) | ✅ | +| Create Glossary Terms | ✅ [[Guide]](/docs/api/tutorials/terms.md#create-terms) | ✅ [[Guide]](/docs/api/tutorials/terms.md#create-terms) | ✅ | +| Read Terms from a Dataset | ✅ [[Guide]](/docs/api/tutorials/terms.md#read-terms) | ✅ [[Guide]](/docs/api/tutorials/terms.md#read-terms) | ✅ | +| Add Terms to a Column of a Dataset | ✅ [[Guide]](/docs/api/tutorials/terms.md#add-terms-to-a-column-of-a-dataset) | ✅ [[Guide]](/docs/api/tutorials/terms.md#add-terms-to-a-column-of-a-dataset) | ✅ | +| Add Terms to a Dataset | ✅ [[Guide]](/docs/api/tutorials/terms.md#add-terms-to-a-dataset) | ✅ [[Guide]](/docs/api/tutorials/terms.md#add-terms-to-a-dataset) | ✅ | +| Create Domains | ✅ [[Guide]](/docs/api/tutorials/domains.md#create-domain) | ✅ [[Guide]](/docs/api/tutorials/domains.md#create-domain) | ✅ | +| Read Domains | ✅ [[Guide]](/docs/api/tutorials/domains.md#read-domains) | ✅ [[Guide]](/docs/api/tutorials/domains.md#read-domains) | ✅ | +| Add Domains to a Dataset | ✅ [[Guide]](/docs/api/tutorials/domains.md#add-domains) | ✅ [[Guide]](/docs/api/tutorials/domains.md#add-domains) | ✅ | +| Remove Domains from a Dataset | ✅ [[Guide]](/docs/api/tutorials/domains.md#remove-domains) | ✅ [[Guide]](/docs/api/tutorials/domains.md#remove-domains) | ✅ | +| Create / Upsert Users | ✅ [[Guide]](/docs/api/tutorials/owners.md#upsert-users) | ✅ [[Guide]](/docs/api/tutorials/owners.md#upsert-users) | ✅ | +| Create / Upsert Group | ✅ [[Guide]](/docs/api/tutorials/owners.md#upsert-group) | ✅ [[Guide]](/docs/api/tutorials/owners.md#upsert-group) | ✅ | +| Read Owners of a Dataset | ✅ [[Guide]](/docs/api/tutorials/owners.md#read-owners) | ✅ [[Guide]](/docs/api/tutorials/owners.md#read-owners) | ✅ | +| Add Owner to a Dataset | ✅ [[Guide]](/docs/api/tutorials/owners.md#add-owners) | ✅ [[Guide]](/docs/api/tutorials/owners.md#add-owners#remove-owners) | ✅ | +| Remove Owner from a Dataset | ✅ [[Guide]](/docs/api/tutorials/owners.md#remove-owners) | ✅ [[Guide]](/docs/api/tutorials/owners.md) | ✅ | +| Add Lineage | ✅ [[Guide]](/docs/api/tutorials/lineage.md) | ✅ [[Guide]](/docs/api/tutorials/lineage.md#add-lineage) | ✅ | +| Add Column Level (Fine Grained) Lineage | 🚫 | ✅ [[Guide]](docs/api/tutorials/lineage.md#add-column-level-lineage) | ✅ | +| Add Documentation (Description) to a Column of a Dataset | ✅ [[Guide]](/docs/api/tutorials/descriptions.md#add-description-on-column) | ✅ [[Guide]](/docs/api/tutorials/descriptions.md#add-description-on-column) | ✅ | +| Add Documentation (Description) to a Dataset | ✅ [[Guide]](/docs/api/tutorials/descriptions.md#add-description-on-dataset) | ✅ [[Guide]](/docs/api/tutorials/descriptions.md#add-description-on-dataset) | ✅ | +| Add / Remove / Replace Custom Properties on a Dataset | 🚫 | ✅ [[Guide]](/docs/api/tutorials/custom-properties.md) | ✅ | +| Add ML Feature to ML Feature Table | 🚫 | ✅ [[Guide]](/docs/api/tutorials/ml.md#add-mlfeature-to-mlfeaturetable) | ✅ | +| Add ML Feature to MLModel | 🚫 | ✅ [[Guide]](/docs/api/tutorials/ml.md#add-mlfeature-to-mlmodel) | ✅ | +| Add ML Group to MLFeatureTable | 🚫 | ✅ [[Guide]](/docs/api/tutorials/ml.md#add-mlgroup-to-mlfeaturetable) | ✅ | +| Create MLFeature | 🚫 | ✅ [[Guide]](/docs/api/tutorials/ml.md#create-mlfeature) | ✅ | +| Create MLFeatureTable | 🚫 | ✅ [[Guide]](/docs/api/tutorials/ml.md#create-mlfeaturetable) | ✅ | +| Create MLModel | 🚫 | ✅ [[Guide]](/docs/api/tutorials/ml.md#create-mlmodel) | ✅ | +| Create MLModelGroup | 🚫 | ✅ [[Guide]](/docs/api/tutorials/ml.md#create-mlmodelgroup) | ✅ | +| Create MLPrimaryKey | 🚫 | ✅ [[Guide]](/docs/api/tutorials/ml.md#create-mlprimarykey) | ✅ | +| Create MLFeatureTable | 🚫 | ✅ [[Guide]](/docs/api/tutorials/ml.md#create-mlfeaturetable) | ✅ | +| Read MLFeature | ✅ [[Guide]](/docs/api/tutorials/ml.md#read-mlfeature) | ✅ [[Guide]](/docs/api/tutorials/ml.md#read-mlfeature) | ✅ | +| Read MLFeatureTable | ✅ [[Guide]](/docs/api/tutorials/ml.md#read-mlfeaturetable) | ✅ [[Guide]](/docs/api/tutorials/ml.md#read-mlfeaturetable) | ✅ | +| Read MLModel | ✅ [[Guide]](/docs/api/tutorials/ml.md#read-mlmodel) | ✅ [[Guide]](/docs/api/tutorials/ml.md#read-mlmodel) | ✅ | +| Read MLModelGroup | ✅ [[Guide]](/docs/api/tutorials/ml.md#read-mlmodelgroup) | ✅ [[Guide]](/docs/api/tutorials/ml.md#read-mlmodelgroup) | ✅ | +| Read MLPrimaryKey | ✅ [[Guide]](/docs/api/tutorials/ml.md#read-mlprimarykey) | ✅ [[Guide]](/docs/api/tutorials/ml.md#read-mlprimarykey) | ✅ | +| Create Data Product | 🚫 | ✅ [[Code]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/create_dataproduct.py) | ✅ | +| Create Lineage Between Chart and Dashboard | 🚫 | ✅ [[Code]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_chart_dashboard.py) | ✅ | +| Create Lineage Between Dataset and Chart | 🚫 | ✅ [[Code]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_dataset_chart.py) | ✅ | +| Create Lineage Between Dataset and DataJob | 🚫 | ✅ [[Code]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_dataset_job_dataset.py) | ✅ | +| Create Finegrained Lineage as DataJob for Dataset | 🚫 | ✅ [[Code]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_emitter_datajob_finegrained.py) | ✅ | +| Create Finegrained Lineage for Dataset | 🚫 | ✅ [[Code]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_emitter_dataset_finegrained.py) | ✅ | +| Create Dataset Lineage with Kafka | 🚫 | ✅ [[Code]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_emitter_kafka.py) | ✅ | +| Create Dataset Lineage with MCPW & Rest Emitter | 🚫 | ✅ [[Code]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_emitter_mcpw_rest.py) | ✅ | +| Create Dataset Lineage with Rest Emitter | 🚫 | ✅ [[Code]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_emitter_rest.py) | ✅ | +| Create DataJob with Dataflow | 🚫 | ✅ [[Code]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_job_dataflow.py) [[Simple]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_job_dataflow_new_api_simple.py) [[Verbose]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_job_dataflow_new_api_verbose.py) | ✅ | +| Create Programmatic Pipeline | 🚫 | ✅ [[Code]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/programatic_pipeline.py) | ✅ | From 0d06a613655fbec2311161c89cb4f333e0d74c31 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Mon, 14 Oct 2024 20:38:23 -0700 Subject: [PATCH 19/50] docs: add docs on term suggestion (#11606) --- docs-website/sidebars.js | 12 ++++ docs/automations/ai-docs.md | 36 ++++++++++ docs/automations/ai-term-suggestion.md | 72 +++++++++++++++++++ docs/automations/snowflake-tag-propagation.md | 33 +++++---- 4 files changed, 136 insertions(+), 17 deletions(-) create mode 100644 docs/automations/ai-docs.md create mode 100644 docs/automations/ai-term-suggestion.md diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js index 06838f2e686bc..8f3da2050a9b7 100644 --- a/docs-website/sidebars.js +++ b/docs-website/sidebars.js @@ -113,6 +113,18 @@ module.exports = { id: "docs/automations/snowflake-tag-propagation", className: "saasOnly", }, + { + label: "AI Classification", + type: "doc", + id: "docs/automations/ai-term-suggestion", + className: "saasOnly", + }, + { + label: "AI Documentation", + type: "doc", + id: "docs/automations/ai-docs", + className: "saasOnly", + }, ], }, { diff --git a/docs/automations/ai-docs.md b/docs/automations/ai-docs.md new file mode 100644 index 0000000000000..bbec33f3bcae6 --- /dev/null +++ b/docs/automations/ai-docs.md @@ -0,0 +1,36 @@ +import FeatureAvailability from '@site/src/components/FeatureAvailability'; + +# AI Documentation + + + +:::info + +This feature is currently in closed beta. Reach out to your Acryl representative to get access. + +::: + +With AI-powered documentation, you can automatically generate documentation for tables and columns. + +

+ +

+ +## Configuring + +No configuration is required - just hit "Generate" on any table or column in the UI. + +## How it works + +Generating good documentation requires a holistic understanding of the data. Information we take into account includes, but is not limited to: + +- Dataset name and any existing documentation +- Column name, type, description, and sample values +- Lineage relationships to upstream and downstream assets +- Metadata about other related assets + +Data privacy: Your metadata is not sent to any third-party LLMs. We use AWS Bedrock internally, which means all metadata remains within the Acryl AWS account. We do not fine-tune on customer data. + +## Limitations + +- This feature is powered by an LLM, which can produce inaccurate results. While we've taken steps to reduce the likelihood of hallucinations, they can still occur. diff --git a/docs/automations/ai-term-suggestion.md b/docs/automations/ai-term-suggestion.md new file mode 100644 index 0000000000000..27d1716cfc372 --- /dev/null +++ b/docs/automations/ai-term-suggestion.md @@ -0,0 +1,72 @@ +import FeatureAvailability from '@site/src/components/FeatureAvailability'; + +# AI Glossary Term Suggestions + + + +:::info + +This feature is currently in closed beta. Reach out to your Acryl representative to get access. + +::: + +The AI Glossary Term Suggestion automation uses LLMs to suggest [Glossary Terms](../glossary/business-glossary.md) for tables and columns in your data. + +This is useful for improving coverage of glossary terms across your organization, which is important for compliance and governance efforts. + +This automation can: + +- Automatically suggests glossary terms for tables and columns. +- Goes beyond a predefined set of terms and works with your business glossary. +- Generates [proposals](../managed-datahub/approval-workflows.md) for owners to review, or can automatically add terms to tables/columns. +- Automatically adjusts to human-provided feedback and curation (coming soon). + +## Prerequisites + +- A business glossary with terms defined. Additional metadata, like documentation and existing term assignments, will improve the accuracy of our suggestions. + +## Configuring + +1. **Navigate to Automations**: Click on 'Govern' > 'Automations' in the navigation bar. + +

+ +

+ +2. **Create the Automation**: Click on 'Create' and select 'AI Glossary Term Suggestions'. + +

+ +

+ +3. **Configure the Automation**: Fill in the required fields to configure the automation. + The main fields to configure are (1) what terms to use for suggestions and (2) what entities to generate suggestions for. + +

+ +

+ +4. Once it's enabled, that's it! You'll start to see terms show up in the UI, either on assets or in the proposals page. + +

+ +

+ +## How it works + +The automation will scan through all the datasets matched by the configured filters. For each one, it will generate suggestions. +If new entities are added that match the configured filters, those will also be classified within 24 hours. + +We take into account the following metadata when generating suggestions: + +- Dataset name and description +- Column name, type, description, and sample values +- Glossary term name, documentation, and hierarchy +- Feedback loop: existing assignments and accepted/rejected proposals (coming soon) + +Data privacy: Your metadata is not sent to any third-party LLMs. We use AWS Bedrock internally, which means all metadata remains within the Acryl AWS account. We do not fine-tune on customer data. + +## Limitations + +- A single configured automation can classify at most 10k entities. +- We cannot do partial reclassification. If you add a new column to an existing table, we won't regenerate suggestions for that table. diff --git a/docs/automations/snowflake-tag-propagation.md b/docs/automations/snowflake-tag-propagation.md index bdc80376dfb48..c708e40cbdd81 100644 --- a/docs/automations/snowflake-tag-propagation.md +++ b/docs/automations/snowflake-tag-propagation.md @@ -1,4 +1,3 @@ - import FeatureAvailability from '@site/src/components/FeatureAvailability'; # Snowflake Tag Propagation Automation @@ -20,22 +19,22 @@ both columns and tables back to Snowflake. This automation is available in DataH 1. **Navigate to Automations**: Click on 'Govern' > 'Automations' in the navigation bar. -

- +

+

2. **Create An Automation**: Click on 'Create' and select 'Snowflake Tag Propagation'. -

- +

+

-3. **Configure Automation**: Fill in the required fields to connect to Snowflake, along with the name, description, and category. -Note that you can limit propagation based on specific Tags and Glossary Terms. If none are selected, then ALL Tags or Glossary Terms will be automatically -propagated to Snowflake tables and columns. Finally, click 'Save and Run' to start the automation +3. **Configure Automation**: Fill in the required fields to connect to Snowflake, along with the name, description, and category. + Note that you can limit propagation based on specific Tags and Glossary Terms. If none are selected, then ALL Tags or Glossary Terms will be automatically + propagated to Snowflake tables and columns. Finally, click 'Save and Run' to start the automation -

- +

+

## Propagating for Existing Assets @@ -46,13 +45,13 @@ Note that it may take some time to complete the initial back-filling process, de To do so, navigate to the Automation you created in Step 3 above, click the 3-dot "More" menu

- +

and then click "Initialize".

- +

This one-time step will kick off the back-filling process for existing descriptions. If you only want to begin propagating @@ -68,21 +67,21 @@ that you no longer want propagated descriptions to be visible. To do this, navigate to the Automation you created in Step 3 above, click the 3-dot "More" menu

- +

and then click "Rollback".

- +

This one-time step will remove all propagated tags and glossary terms from Snowflake. To simply stop propagating new tags, you can disable the automation. ## Viewing Propagated Tags -You can view propagated Tags (and corresponding DataHub URNs) inside the Snowflake UI to confirm the automation is working as expected. +You can view propagated Tags (and corresponding DataHub URNs) inside the Snowflake UI to confirm the automation is working as expected. -

- +

+

From be1b8806a772e459016a225f3ace103bb692b070 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Tue, 15 Oct 2024 01:39:41 -0700 Subject: [PATCH 20/50] fix(airflow): fix lint related to dag_run field (#11616) --- .../src/datahub_airflow_plugin/datahub_listener.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py index d1c7e996dd03d..c1d5b306f187d 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py @@ -383,13 +383,11 @@ def on_task_instance_running( return logger.debug( - f"DataHub listener got notification about task instance start for {task_instance.task_id} of dag {task_instance.dag_run.dag_id}" + f"DataHub listener got notification about task instance start for {task_instance.task_id} of dag {task_instance.dag_id}" ) - if not self.config.dag_filter_pattern.allowed(task_instance.dag_run.dag_id): - logger.debug( - f"DAG {task_instance.dag_run.dag_id} is not allowed by the pattern" - ) + if not self.config.dag_filter_pattern.allowed(task_instance.dag_id): + logger.debug(f"DAG {task_instance.dag_id} is not allowed by the pattern") return if self.config.render_templates: From 1eec2c42383b1b808ea157f9e2acebe9f9ec4fbe Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Tue, 15 Oct 2024 14:53:45 +0530 Subject: [PATCH 21/50] fix(ingest): drop empty fields (#11613) --- .../src/datahub/ingestion/api/source.py | 2 + .../datahub/ingestion/api/source_helpers.py | 44 +++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/metadata-ingestion/src/datahub/ingestion/api/source.py b/metadata-ingestion/src/datahub/ingestion/api/source.py index 85ae17ddf6529..586b1c610dc75 100644 --- a/metadata-ingestion/src/datahub/ingestion/api/source.py +++ b/metadata-ingestion/src/datahub/ingestion/api/source.py @@ -37,6 +37,7 @@ from datahub.ingestion.api.source_helpers import ( auto_browse_path_v2, auto_fix_duplicate_schema_field_paths, + auto_fix_empty_field_paths, auto_lowercase_urns, auto_materialize_referenced_tags_terms, auto_status_aspect, @@ -444,6 +445,7 @@ def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]: partial( auto_fix_duplicate_schema_field_paths, platform=self._infer_platform() ), + partial(auto_fix_empty_field_paths, platform=self._infer_platform()), browse_path_processor, partial(auto_workunit_reporter, self.get_report()), auto_patch_last_modified, diff --git a/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py b/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py index 372aef707f232..748d8a8e52a79 100644 --- a/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py +++ b/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py @@ -394,6 +394,50 @@ def auto_fix_duplicate_schema_field_paths( ) +def auto_fix_empty_field_paths( + stream: Iterable[MetadataWorkUnit], + *, + platform: Optional[str] = None, +) -> Iterable[MetadataWorkUnit]: + """Count schema metadata aspects with empty field paths and emit telemetry.""" + + total_schema_aspects = 0 + schemas_with_empty_fields = 0 + empty_field_paths = 0 + + for wu in stream: + schema_metadata = wu.get_aspect_of_type(SchemaMetadataClass) + if schema_metadata: + total_schema_aspects += 1 + + updated_fields: List[SchemaFieldClass] = [] + for field in schema_metadata.fields: + if field.fieldPath: + updated_fields.append(field) + else: + empty_field_paths += 1 + + if empty_field_paths > 0: + logger.info( + f"Fixing empty field paths in schema aspect for {wu.get_urn()} by dropping empty fields" + ) + schema_metadata.fields = updated_fields + schemas_with_empty_fields += 1 + + yield wu + + if schemas_with_empty_fields > 0: + properties = { + "platform": platform, + "total_schema_aspects": total_schema_aspects, + "schemas_with_empty_fields": schemas_with_empty_fields, + "empty_field_paths": empty_field_paths, + } + telemetry.telemetry_instance.ping( + "ingestion_empty_schema_field_paths", properties + ) + + def auto_empty_dataset_usage_statistics( stream: Iterable[MetadataWorkUnit], *, From d8ede691f095aa7dc27f90692a067555584bd33e Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Tue, 15 Oct 2024 09:34:30 -0500 Subject: [PATCH 22/50] fix(ci): ensure py 3.10 (#11626) --- .github/workflows/docker-unified.yml | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/.github/workflows/docker-unified.yml b/.github/workflows/docker-unified.yml index ef5770ccb167a..a1ae7cee1736f 100644 --- a/.github/workflows/docker-unified.yml +++ b/.github/workflows/docker-unified.yml @@ -760,14 +760,18 @@ jobs: needs: [setup, datahub_ingestion_base_slim_build] if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} steps: + - name: Check out the repo + uses: acryldata/sane-checkout-action@v3 + - uses: actions/setup-python@v5 + with: + python-version: "3.10" + cache: "pip" - name: Set up JDK 17 uses: actions/setup-java@v4 with: distribution: "zulu" java-version: 17 - uses: gradle/actions/setup-gradle@v3 - - name: Check out the repo - uses: acryldata/sane-checkout-action@v3 - name: Build codegen if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish =='true' }} run: ./gradlew :metadata-ingestion:codegen @@ -852,14 +856,18 @@ jobs: needs: [setup, datahub_ingestion_base_full_build] if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} steps: + - name: Check out the repo + uses: acryldata/sane-checkout-action@v3 + - uses: actions/setup-python@v5 + with: + python-version: "3.10" + cache: "pip" - name: Set up JDK 17 uses: actions/setup-java@v4 with: distribution: "zulu" java-version: 17 - uses: gradle/actions/setup-gradle@v3 - - name: Check out the repo - uses: acryldata/sane-checkout-action@v3 - name: Build codegen if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} run: ./gradlew :metadata-ingestion:codegen @@ -983,16 +991,16 @@ jobs: run: df -h . && docker images - name: Check out the repo uses: acryldata/sane-checkout-action@v3 + - uses: actions/setup-python@v5 + with: + python-version: "3.10" + cache: "pip" - name: Set up JDK 17 uses: actions/setup-java@v4 with: distribution: "zulu" java-version: 17 - uses: gradle/actions/setup-gradle@v3 - - uses: actions/setup-python@v5 - with: - python-version: "3.10" - cache: "pip" - name: Login to DockerHub uses: docker/login-action@v3 if: ${{ needs.setup.outputs.docker-login == 'true' }} From c834cdb0ab9f63dedbc8b69ccc729f776d160337 Mon Sep 17 00:00:00 2001 From: Jay <159848059+jayacryl@users.noreply.github.com> Date: Tue, 15 Oct 2024 10:55:23 -0400 Subject: [PATCH 23/50] feat(docs-site) brought back announcement banner (#11618) --- docs-website/docusaurus.config.js | 8 ++++++++ docs-website/src/pages/index.js | 2 +- docs-website/src/styles/global.scss | 5 +++-- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/docs-website/docusaurus.config.js b/docs-website/docusaurus.config.js index b0cf5bfa35eca..b016f9518ea6c 100644 --- a/docs-website/docusaurus.config.js +++ b/docs-website/docusaurus.config.js @@ -65,6 +65,14 @@ module.exports = { // isCloseable: false, // }, // }), + announcementBar: { + id: "announcement-2", + content: + '
NEW

Join us at Metadata & AI Summit, Oct. 29 & 30!

Register →
', + backgroundColor: "#111", + textColor: "#ffffff", + isCloseable: false, + }, colorMode: { // Only support light mode. defaultMode: 'light', diff --git a/docs-website/src/pages/index.js b/docs-website/src/pages/index.js index 1c36b81a2da95..e1c94780715d3 100644 --- a/docs-website/src/pages/index.js +++ b/docs-website/src/pages/index.js @@ -44,7 +44,7 @@ function Home() { return !siteConfig.customFields.isSaas ? ( {isTourModalVisible ? (
diff --git a/docs-website/src/styles/global.scss b/docs-website/src/styles/global.scss index e256c752b4a0b..96ca07d45d0c2 100644 --- a/docs-website/src/styles/global.scss +++ b/docs-website/src/styles/global.scss @@ -31,7 +31,7 @@ --ifm-navbar-item-padding-horizontal: 1rem; /* Announcement Bar */ - --docusaurus-announcement-bar-height: 60px !important; + --docusaurus-announcement-bar-height: 48px !important; /* Rule */ --ifm-hr-border-width: 1px 0 0 0; @@ -141,8 +141,9 @@ div[class^="announcementBar"] { } a { - color: var(--ifm-button-color); + color: #EFB300; text-decoration: none; + font-size: 1rem } } } From b4b91421e817ccbb4ed0012b55b0793bdbed7b2c Mon Sep 17 00:00:00 2001 From: deepgarg-visa <149145061+deepgarg-visa@users.noreply.github.com> Date: Tue, 15 Oct 2024 20:46:56 +0530 Subject: [PATCH 24/50] fix(search): make graphql query autoCompleteForMultiple to show exact matches first (#11586) --- .../elasticsearch/query/ESSearchDAO.java | 2 +- .../request/AutocompleteRequestHandler.java | 95 ++++++++++------- .../fixtures/SampleDataFixtureTestBase.java | 22 +++- .../AutocompleteRequestHandlerTest.java | 98 +++++++++++++----- .../sample_data/containerindex_v2.json.gz | Bin 295 -> 335 bytes 5 files changed, 154 insertions(+), 63 deletions(-) diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java index f09a81c0c8b89..2d7db075e676f 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java @@ -370,7 +370,7 @@ public AutoCompleteResult autoComplete( IndexConvention indexConvention = opContext.getSearchContext().getIndexConvention(); AutocompleteRequestHandler builder = AutocompleteRequestHandler.getBuilder( - entitySpec, customSearchConfiguration, queryFilterRewriteChain); + entitySpec, customSearchConfiguration, queryFilterRewriteChain, searchConfiguration); SearchRequest req = builder.getSearchRequest( opContext, diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java index 294efb069a904..45359285b4a04 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java @@ -1,6 +1,5 @@ package com.linkedin.metadata.search.elasticsearch.query.request; -import static com.linkedin.metadata.models.SearchableFieldSpecExtractor.PRIMARY_URN_SEARCH_PROPERTIES; import static com.linkedin.metadata.search.utils.ESAccessControlUtil.restrictUrn; import static com.linkedin.metadata.search.utils.ESUtils.applyDefaultSearchFilters; @@ -8,6 +7,7 @@ import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.Urn; import com.linkedin.data.template.StringArray; +import com.linkedin.metadata.config.search.SearchConfiguration; import com.linkedin.metadata.config.search.custom.AutocompleteConfiguration; import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; import com.linkedin.metadata.config.search.custom.QueryConfiguration; @@ -35,6 +35,7 @@ import javax.annotation.Nonnull; import javax.annotation.Nullable; import lombok.extern.slf4j.Slf4j; +import org.apache.commons.lang3.tuple.Pair; import org.opensearch.action.search.SearchRequest; import org.opensearch.action.search.SearchResponse; import org.opensearch.index.query.*; @@ -46,7 +47,7 @@ @Slf4j public class AutocompleteRequestHandler { - private final List _defaultAutocompleteFields; + private final List _defaultAutocompleteFields; private final Map> searchableFieldTypes; private static final Map @@ -56,11 +57,13 @@ public class AutocompleteRequestHandler { private final EntitySpec entitySpec; private final QueryFilterRewriteChain queryFilterRewriteChain; + private final SearchConfiguration searchConfiguration; public AutocompleteRequestHandler( @Nonnull EntitySpec entitySpec, @Nullable CustomSearchConfiguration customSearchConfiguration, - @Nonnull QueryFilterRewriteChain queryFilterRewriteChain) { + @Nonnull QueryFilterRewriteChain queryFilterRewriteChain, + @Nonnull SearchConfiguration searchConfiguration) { this.entitySpec = entitySpec; List fieldSpecs = entitySpec.getSearchableFieldSpecs(); this.customizedQueryHandler = CustomizedQueryHandler.builder(customSearchConfiguration).build(); @@ -69,8 +72,12 @@ public AutocompleteRequestHandler( fieldSpecs.stream() .map(SearchableFieldSpec::getSearchableAnnotation) .filter(SearchableAnnotation::isEnableAutocomplete) - .map(SearchableAnnotation::getFieldName), - Stream.of("urn")) + .map( + searchableAnnotation -> + Pair.of( + searchableAnnotation.getFieldName(), + Double.toString(searchableAnnotation.getBoostScore()))), + Stream.of(Pair.of("urn", "1.0"))) .collect(Collectors.toList()); searchableFieldTypes = fieldSpecs.stream() @@ -87,17 +94,22 @@ public AutocompleteRequestHandler( return set1; })); this.queryFilterRewriteChain = queryFilterRewriteChain; + this.searchConfiguration = searchConfiguration; } public static AutocompleteRequestHandler getBuilder( @Nonnull EntitySpec entitySpec, @Nullable CustomSearchConfiguration customSearchConfiguration, - @Nonnull QueryFilterRewriteChain queryFilterRewriteChain) { + @Nonnull QueryFilterRewriteChain queryFilterRewriteChain, + @Nonnull SearchConfiguration searchConfiguration) { return AUTOCOMPLETE_QUERY_BUILDER_BY_ENTITY_NAME.computeIfAbsent( entitySpec, k -> new AutocompleteRequestHandler( - entitySpec, customSearchConfiguration, queryFilterRewriteChain)); + entitySpec, + customSearchConfiguration, + queryFilterRewriteChain, + searchConfiguration)); } public SearchRequest getSearchRequest( @@ -169,7 +181,7 @@ private BoolQueryBuilder getQuery( public BoolQueryBuilder getQuery( @Nonnull ObjectMapper objectMapper, @Nullable AutocompleteConfiguration customAutocompleteConfig, - List autocompleteFields, + List autocompleteFields, @Nonnull String query) { BoolQueryBuilder finalQuery = @@ -189,7 +201,7 @@ public BoolQueryBuilder getQuery( private Optional getAutocompleteQuery( @Nullable AutocompleteConfiguration customConfig, - List autocompleteFields, + List autocompleteFields, @Nonnull String query) { Optional result = Optional.empty(); @@ -200,33 +212,39 @@ private Optional getAutocompleteQuery( return result; } - private static BoolQueryBuilder defaultQuery( - List autocompleteFields, @Nonnull String query) { + private BoolQueryBuilder defaultQuery(List autocompleteFields, @Nonnull String query) { BoolQueryBuilder finalQuery = QueryBuilders.boolQuery().minimumShouldMatch(1); // Search for exact matches with higher boost and ngram matches - MultiMatchQueryBuilder autocompleteQueryBuilder = + MultiMatchQueryBuilder multiMatchQueryBuilder = QueryBuilders.multiMatchQuery(query).type(MultiMatchQueryBuilder.Type.BOOL_PREFIX); - final float urnBoost = - Float.parseFloat((String) PRIMARY_URN_SEARCH_PROPERTIES.get("boostScore")); autocompleteFields.forEach( - fieldName -> { - if ("urn".equals(fieldName)) { - autocompleteQueryBuilder.field(fieldName + ".ngram", urnBoost); - autocompleteQueryBuilder.field(fieldName + ".ngram._2gram", urnBoost); - autocompleteQueryBuilder.field(fieldName + ".ngram._3gram", urnBoost); - autocompleteQueryBuilder.field(fieldName + ".ngram._4gram", urnBoost); - } else { - autocompleteQueryBuilder.field(fieldName + ".ngram"); - autocompleteQueryBuilder.field(fieldName + ".ngram._2gram"); - autocompleteQueryBuilder.field(fieldName + ".ngram._3gram"); - autocompleteQueryBuilder.field(fieldName + ".ngram._4gram"); + pair -> { + final String fieldName = (String) pair.getLeft(); + final float boostScore = Float.parseFloat((String) pair.getRight()); + multiMatchQueryBuilder.field(fieldName + ".ngram"); + multiMatchQueryBuilder.field(fieldName + ".ngram._2gram"); + multiMatchQueryBuilder.field(fieldName + ".ngram._3gram"); + multiMatchQueryBuilder.field(fieldName + ".ngram._4gram"); + multiMatchQueryBuilder.field(fieldName + ".delimited"); + if (!fieldName.equalsIgnoreCase("urn")) { + multiMatchQueryBuilder.field(fieldName + ".ngram", boostScore); + multiMatchQueryBuilder.field( + fieldName + ".ngram._2gram", + boostScore * (searchConfiguration.getWordGram().getTwoGramFactor())); + multiMatchQueryBuilder.field( + fieldName + ".ngram._3gram", + boostScore * (searchConfiguration.getWordGram().getThreeGramFactor())); + multiMatchQueryBuilder.field( + fieldName + ".ngram._4gram", + boostScore * (searchConfiguration.getWordGram().getFourGramFactor())); + finalQuery.should( + QueryBuilders.matchQuery(fieldName + ".keyword", query).boost(boostScore)); } - autocompleteQueryBuilder.field(fieldName + ".delimited"); finalQuery.should(QueryBuilders.matchPhrasePrefixQuery(fieldName + ".delimited", query)); }); - finalQuery.should(autocompleteQueryBuilder); + finalQuery.should(multiMatchQueryBuilder); return finalQuery; } @@ -241,12 +259,17 @@ private HighlightBuilder getHighlights(@Nullable String field) { // Check for each field name and any subfields getAutocompleteFields(field) .forEach( - fieldName -> - highlightBuilder - .field(fieldName) - .field(fieldName + ".*") - .field(fieldName + ".ngram") - .field(fieldName + ".delimited")); + pair -> { + final String fieldName = (String) pair.getLeft(); + highlightBuilder + .field(fieldName) + .field(fieldName + ".*") + .field(fieldName + ".ngram") + .field(fieldName + ".delimited"); + if (!fieldName.equalsIgnoreCase("urn")) { + highlightBuilder.field(fieldName + ".keyword"); + } + }); // set field match req false for ngram highlightBuilder.fields().stream() @@ -256,9 +279,9 @@ private HighlightBuilder getHighlights(@Nullable String field) { return highlightBuilder; } - private List getAutocompleteFields(@Nullable String field) { - if (field != null && !field.isEmpty()) { - return ImmutableList.of(field); + private List getAutocompleteFields(@Nullable String field) { + if (field != null && !field.isEmpty() && !field.equalsIgnoreCase("urn")) { + return ImmutableList.of(Pair.of(field, "10.0")); } return _defaultAutocompleteFields; } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java index bc3c892e07b1b..504eb5f5fc13d 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java @@ -283,7 +283,7 @@ public void testFixtureInitialization() { Map.of( "dataset", 13, "chart", 0, - "container", 1, + "container", 2, "dashboard", 0, "tag", 0, "mlmodel", 0); @@ -903,6 +903,26 @@ public void testContainerAutoComplete() { }); } + @Test + public void testContainerAutoComplete_with_exactMatch_onTop() { + List.of("container") + .forEach( + query -> { + try { + AutoCompleteResults result = + autocomplete( + getOperationContext(), new ContainerType(getEntityClient()), query); + assertTrue( + result.getSuggestions().get(0).equals("container"), + String.format( + "Expected query:`%s` on top of suggestions, found %s", + query, result.getSuggestions().get(0))); + } catch (Exception e) { + throw new RuntimeException(e); + } + }); + } + @Test public void testGroupAutoComplete() { List.of("T", "Te", "Tes", "Test ", "Test G", "Test Gro", "Test Group ") diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java index 572d79ebf2f0c..c5205906e9d37 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java @@ -5,6 +5,10 @@ import static org.testng.Assert.assertTrue; import com.linkedin.metadata.TestEntitySpecBuilder; +import com.linkedin.metadata.config.search.ExactMatchConfiguration; +import com.linkedin.metadata.config.search.PartialConfiguration; +import com.linkedin.metadata.config.search.SearchConfiguration; +import com.linkedin.metadata.config.search.WordGramConfiguration; import com.linkedin.metadata.config.search.custom.AutocompleteConfiguration; import com.linkedin.metadata.config.search.custom.BoolQueryConfiguration; import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; @@ -32,14 +36,44 @@ import org.testng.annotations.Test; public class AutocompleteRequestHandlerTest { - private AutocompleteRequestHandler handler = - AutocompleteRequestHandler.getBuilder( - TestEntitySpecBuilder.getSpec(), - CustomSearchConfiguration.builder().build(), - QueryFilterRewriteChain.EMPTY); + private static SearchConfiguration testQueryConfig; + private static AutocompleteRequestHandler handler; private OperationContext mockOpContext = TestOperationContexts.systemContextNoSearchAuthorization(mock(EntityRegistry.class)); + static { + testQueryConfig = new SearchConfiguration(); + testQueryConfig.setMaxTermBucketSize(20); + + ExactMatchConfiguration exactMatchConfiguration = new ExactMatchConfiguration(); + exactMatchConfiguration.setExclusive(false); + exactMatchConfiguration.setExactFactor(10.0f); + exactMatchConfiguration.setWithPrefix(true); + exactMatchConfiguration.setPrefixFactor(6.0f); + exactMatchConfiguration.setCaseSensitivityFactor(0.7f); + exactMatchConfiguration.setEnableStructured(true); + + WordGramConfiguration wordGramConfiguration = new WordGramConfiguration(); + wordGramConfiguration.setTwoGramFactor(1.2f); + wordGramConfiguration.setThreeGramFactor(1.5f); + wordGramConfiguration.setFourGramFactor(1.8f); + + PartialConfiguration partialConfiguration = new PartialConfiguration(); + partialConfiguration.setFactor(0.4f); + partialConfiguration.setUrnFactor(0.7f); + + testQueryConfig.setExactMatch(exactMatchConfiguration); + testQueryConfig.setWordGram(wordGramConfiguration); + testQueryConfig.setPartial(partialConfiguration); + + handler = + AutocompleteRequestHandler.getBuilder( + TestEntitySpecBuilder.getSpec(), + CustomSearchConfiguration.builder().build(), + QueryFilterRewriteChain.EMPTY, + testQueryConfig); + } + private static final QueryConfiguration TEST_QUERY_CONFIG = QueryConfiguration.builder() .queryRegex(".*") @@ -88,9 +122,12 @@ public void testDefaultAutocompleteRequest() { BoolQueryBuilder wrapper = (BoolQueryBuilder) ((FunctionScoreQueryBuilder) sourceBuilder.query()).query(); BoolQueryBuilder query = (BoolQueryBuilder) extractNestedQuery(wrapper); - assertEquals(query.should().size(), 3); + assertEquals(query.should().size(), 4); - MultiMatchQueryBuilder autocompleteQuery = (MultiMatchQueryBuilder) query.should().get(2); + MatchQueryBuilder matchQueryBuilder = (MatchQueryBuilder) query.should().get(0); + assertEquals("keyPart1.keyword", matchQueryBuilder.fieldName()); + + MultiMatchQueryBuilder autocompleteQuery = (MultiMatchQueryBuilder) query.should().get(3); Map queryFields = autocompleteQuery.fields(); assertTrue(queryFields.containsKey("keyPart1.ngram")); assertTrue(queryFields.containsKey("keyPart1.ngram._2gram")); @@ -99,7 +136,7 @@ public void testDefaultAutocompleteRequest() { assertEquals(autocompleteQuery.type(), MultiMatchQueryBuilder.Type.BOOL_PREFIX); MatchPhrasePrefixQueryBuilder prefixQuery = - (MatchPhrasePrefixQueryBuilder) query.should().get(0); + (MatchPhrasePrefixQueryBuilder) query.should().get(1); assertEquals("keyPart1.delimited", prefixQuery.fieldName()); assertEquals(wrapper.mustNot().size(), 1); @@ -108,15 +145,16 @@ public void testDefaultAutocompleteRequest() { assertEquals(removedFilter.value(), true); HighlightBuilder highlightBuilder = sourceBuilder.highlighter(); List highlightedFields = highlightBuilder.fields(); - assertEquals(highlightedFields.size(), 8); + assertEquals(highlightedFields.size(), 9); assertEquals(highlightedFields.get(0).name(), "keyPart1"); assertEquals(highlightedFields.get(1).name(), "keyPart1.*"); assertEquals(highlightedFields.get(2).name(), "keyPart1.ngram"); assertEquals(highlightedFields.get(3).name(), "keyPart1.delimited"); - assertEquals(highlightedFields.get(4).name(), "urn"); - assertEquals(highlightedFields.get(5).name(), "urn.*"); - assertEquals(highlightedFields.get(6).name(), "urn.ngram"); - assertEquals(highlightedFields.get(7).name(), "urn.delimited"); + assertEquals(highlightedFields.get(4).name(), "keyPart1.keyword"); + assertEquals(highlightedFields.get(5).name(), "urn"); + assertEquals(highlightedFields.get(6).name(), "urn.*"); + assertEquals(highlightedFields.get(7).name(), "urn.ngram"); + assertEquals(highlightedFields.get(8).name(), "urn.delimited"); } @Test @@ -130,9 +168,12 @@ public void testAutocompleteRequestWithField() { (BoolQueryBuilder) ((FunctionScoreQueryBuilder) sourceBuilder.query()).query(); assertEquals(wrapper.should().size(), 1); BoolQueryBuilder query = (BoolQueryBuilder) extractNestedQuery(wrapper); - assertEquals(query.should().size(), 2); + assertEquals(query.should().size(), 3); - MultiMatchQueryBuilder autocompleteQuery = (MultiMatchQueryBuilder) query.should().get(1); + MatchQueryBuilder matchQueryBuilder = (MatchQueryBuilder) query.should().get(0); + assertEquals("field.keyword", matchQueryBuilder.fieldName()); + + MultiMatchQueryBuilder autocompleteQuery = (MultiMatchQueryBuilder) query.should().get(2); Map queryFields = autocompleteQuery.fields(); assertTrue(queryFields.containsKey("field.ngram")); assertTrue(queryFields.containsKey("field.ngram._2gram")); @@ -141,7 +182,7 @@ public void testAutocompleteRequestWithField() { assertEquals(autocompleteQuery.type(), MultiMatchQueryBuilder.Type.BOOL_PREFIX); MatchPhrasePrefixQueryBuilder prefixQuery = - (MatchPhrasePrefixQueryBuilder) query.should().get(0); + (MatchPhrasePrefixQueryBuilder) query.should().get(1); assertEquals("field.delimited", prefixQuery.fieldName()); MatchQueryBuilder removedFilter = (MatchQueryBuilder) wrapper.mustNot().get(0); @@ -149,11 +190,12 @@ public void testAutocompleteRequestWithField() { assertEquals(removedFilter.value(), true); HighlightBuilder highlightBuilder = sourceBuilder.highlighter(); List highlightedFields = highlightBuilder.fields(); - assertEquals(highlightedFields.size(), 4); + assertEquals(highlightedFields.size(), 5); assertEquals(highlightedFields.get(0).name(), "field"); assertEquals(highlightedFields.get(1).name(), "field.*"); assertEquals(highlightedFields.get(2).name(), "field.ngram"); assertEquals(highlightedFields.get(3).name(), "field.delimited"); + assertEquals(highlightedFields.get(4).name(), "field.keyword"); } @Test @@ -174,7 +216,8 @@ public void testCustomConfigWithDefault() { .build()) .build())) .build(), - QueryFilterRewriteChain.EMPTY); + QueryFilterRewriteChain.EMPTY, + testQueryConfig); SearchRequest autocompleteRequest = withoutDefaultQuery.getSearchRequest(mockOpContext, "input", null, null, 10); @@ -200,7 +243,8 @@ public void testCustomConfigWithDefault() { .build()) .build())) .build(), - QueryFilterRewriteChain.EMPTY); + QueryFilterRewriteChain.EMPTY, + testQueryConfig); autocompleteRequest = withDefaultQuery.getSearchRequest(mockOpContext, "input", null, null, 10); sourceBuilder = autocompleteRequest.source(); @@ -215,7 +259,7 @@ public void testCustomConfigWithDefault() { BoolQueryBuilder defaultQuery = (BoolQueryBuilder) shouldQueries.stream().filter(qb -> qb instanceof BoolQueryBuilder).findFirst().get(); - assertEquals(defaultQuery.should().size(), 3); + assertEquals(defaultQuery.should().size(), 4); // Custom customQuery = @@ -243,7 +287,8 @@ public void testCustomConfigWithInheritedQueryFunctionScores() { .build()) .build())) .build(), - QueryFilterRewriteChain.EMPTY); + QueryFilterRewriteChain.EMPTY, + testQueryConfig); SearchRequest autocompleteRequest = withInherit.getSearchRequest(mockOpContext, "input", null, null, 10); @@ -282,7 +327,8 @@ public void testCustomConfigWithInheritedQueryFunctionScores() { .build()) .build())) .build(), - QueryFilterRewriteChain.EMPTY); + QueryFilterRewriteChain.EMPTY, + testQueryConfig); autocompleteRequest = noQueryCustomization.getSearchRequest(mockOpContext, "input", null, null, 10); @@ -345,7 +391,8 @@ public void testCustomConfigWithFunctionScores() { "deprecated", Map.of("value", false))))))) .build())) .build(), - QueryFilterRewriteChain.EMPTY); + QueryFilterRewriteChain.EMPTY, + testQueryConfig); SearchRequest autocompleteRequest = explicitNoInherit.getSearchRequest(mockOpContext, "input", null, null, 10); @@ -398,7 +445,8 @@ public void testCustomConfigWithFunctionScores() { "deprecated", Map.of("value", false))))))) .build())) .build(), - QueryFilterRewriteChain.EMPTY); + QueryFilterRewriteChain.EMPTY, + testQueryConfig); autocompleteRequest = explicit.getSearchRequest(mockOpContext, "input", null, null, 10); sourceBuilder = autocompleteRequest.source(); @@ -411,7 +459,7 @@ public void testCustomConfigWithFunctionScores() { assertEquals(customQuery, QueryBuilders.matchAllQuery()); // standard query still present - assertEquals(((BoolQueryBuilder) query.should().get(1)).should().size(), 3); + assertEquals(((BoolQueryBuilder) query.should().get(1)).should().size(), 4); // custom functions included assertEquals(wrapper.filterFunctionBuilders(), expectedCustomScoreFunctions); diff --git a/metadata-io/src/test/resources/elasticsearch/sample_data/containerindex_v2.json.gz b/metadata-io/src/test/resources/elasticsearch/sample_data/containerindex_v2.json.gz index 2fa49c810abfa1af8cef95ac79136377d4b1ce4c..bd36747255f8604c37bc50fceb13293218a46330 100644 GIT binary patch literal 335 zcmV-V0kHlbiwFqPZ4PGu17mM)bYW?3WpZh5WMz0?b}}w%b8l_{?T|lf!!QuV_kIk` zHpGe(Qb9H^U0PGRl@QL-nHa1;mCh|8^t)Gf6K64$ln#Xigoekx_wIfN)_KE3|BJQ} z(i(!spcgz-IiKa2&DEm1U*&SK0tqT#u28B>0!;xL)=8Ry$9i!bH#ZG8EX zZ9eY~zo14K=nQSNq|lL-R=e~#@c$u>yT5#I{4>na(ck#jP_rqjLlL2NH=>sl17mM)bYW?3WpZh5WMz0?b}}w%b8l_{&5$u`!!Qtq_x=pc zF~o`!T!A+)U0PGRl@QL-1q&?6mCh|8^uJef9eXN;PKD!!$Gz|F-j$8sGLgFIwUEvd zXp3I(Tors Date: Wed, 16 Oct 2024 01:12:58 +0900 Subject: [PATCH 25/50] feat: add contributor pr open comment action (#11487) Co-authored-by: Harshal Sheth --- .../workflows/contributor-open-pr-comment.yml | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 .github/workflows/contributor-open-pr-comment.yml diff --git a/.github/workflows/contributor-open-pr-comment.yml b/.github/workflows/contributor-open-pr-comment.yml new file mode 100644 index 0000000000000..b3da16ca994ba --- /dev/null +++ b/.github/workflows/contributor-open-pr-comment.yml @@ -0,0 +1,39 @@ +name: PR Comment + +on: + pull_request: + types: [opened] + +jobs: + post-pr-opened-comment: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Get and Format Username (PR only) + if: github.event_name == 'pull_request' + run: | + formatted_username=$(echo "${{ github.event.pull_request.user.login }}" | tr '[:upper:]' '[:lower:]' | sed 's/ /-/g') + echo "FORMATTED_USERNAME=$formatted_username" >> $GITHUB_ENV + + - name: Create Comment (PR only) + if: github.event_name == 'pull_request' + uses: actions/github-script@v6 + with: + script: | + if (context.payload.pull_request) { + const prUser = process.env.FORMATTED_USERNAME; + const url = `https://contributors.datahubproject.io/${prUser}`; + const body = `Hello @${prUser} :smile: \n\n Thank you so much for opening a pull request!\n\n![Image](https://contributors.datahubproject.io/api/og?userId=${{ github.event.pull_request.user.login }})\nYou can check out your contributor card and see all your past stats [here](${url})!`; + + // Create a comment on the PR + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.payload.pull_request.number, + body: body + }); + } else { + console.log('Not a pull request event.'); + } From fdae71d25f68842609bd7e2ec837744b56531029 Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Tue, 15 Oct 2024 11:44:55 -0500 Subject: [PATCH 26/50] docs(ingestion): add architecture diagrams (#11628) --- docs/advanced/mcp-mcl.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/docs/advanced/mcp-mcl.md b/docs/advanced/mcp-mcl.md index 9efb9b794954d..333891ba1a95d 100644 --- a/docs/advanced/mcp-mcl.md +++ b/docs/advanced/mcp-mcl.md @@ -14,6 +14,18 @@ To mitigate these downsides, we are committed to providing cross-language client Ultimately, we intend to realize a state in which the Entities and Aspect schemas can be altered without requiring generated code and without maintaining a single mega-model schema (looking at you, Snapshot.pdl). The intention is that changes to the metadata model become even easier than they are today. +### Synchronous Ingestion Architecture + +

+ +

+ +### Asynchronous Ingestion Architecture + +

+ +

+ ## Modeling A Metadata Change Proposal is defined (in PDL) as follows From 5d3e464c21f384d5eb25a0291a77067a9605a9fc Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Tue, 15 Oct 2024 12:35:41 -0500 Subject: [PATCH 27/50] feat(validations): Ingest and metadata schema validators (#11619) Co-authored-by: Pedro Silva --- docs/how/updating-datahub.md | 2 + .../aspect/validation/FieldPathValidator.java | 116 +++++++++ .../validators/FieldPathValidatorTest.java | 233 ++++++++++++++++++ .../java/com/linkedin/metadata/Constants.java | 7 + .../datahub/ingestion/run/pipeline_config.py | 17 +- .../datahub/testing/compare_metadata_json.py | 4 + .../ExecutionRequestResultValidator.java | 70 ++++++ .../ExecutionRequestResultValidatorTest.java | 166 +++++++++++++ .../src/main/resources/entity-registry.yml | 6 + .../SpringStandardPluginConfiguration.java | 46 ++++ 10 files changed, 663 insertions(+), 4 deletions(-) create mode 100644 entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/FieldPathValidator.java create mode 100644 entity-registry/src/test/java/com/linkedin/metadata/aspect/validators/FieldPathValidatorTest.java create mode 100644 metadata-io/src/main/java/com/linkedin/metadata/aspect/validation/ExecutionRequestResultValidator.java create mode 100644 metadata-io/src/test/java/com/linkedin/metadata/aspect/validation/ExecutionRequestResultValidatorTest.java diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md index 00e020bd2a387..dbcc7da846703 100644 --- a/docs/how/updating-datahub.md +++ b/docs/how/updating-datahub.md @@ -24,6 +24,8 @@ This file documents any backwards-incompatible changes in DataHub and assists pe - #11484 - Metadata service authentication enabled by default - #11484 - Rest API authorization enabled by default - #10472 - `SANDBOX` added as a FabricType. No rollbacks allowed once metadata with this fabric type is added without manual cleanups in databases. +- #11619 - schema field/column paths can no longer be empty strings +- #11619 - schema field/column paths can no longer be duplicated within the schema ### Potential Downtime diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/FieldPathValidator.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/FieldPathValidator.java new file mode 100644 index 0000000000000..7c279254e1bc3 --- /dev/null +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/FieldPathValidator.java @@ -0,0 +1,116 @@ +package com.linkedin.metadata.aspect.validation; + +import static com.linkedin.metadata.Constants.*; + +import com.linkedin.metadata.aspect.RetrieverContext; +import com.linkedin.metadata.aspect.batch.BatchItem; +import com.linkedin.metadata.aspect.batch.ChangeMCP; +import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; +import com.linkedin.metadata.aspect.plugins.validation.AspectPayloadValidator; +import com.linkedin.metadata.aspect.plugins.validation.AspectValidationException; +import com.linkedin.metadata.aspect.plugins.validation.ValidationExceptionCollection; +import com.linkedin.schema.EditableSchemaFieldInfo; +import com.linkedin.schema.EditableSchemaMetadata; +import com.linkedin.schema.SchemaField; +import com.linkedin.schema.SchemaMetadata; +import java.util.Collection; +import java.util.Optional; +import java.util.stream.Stream; +import javax.annotation.Nonnull; +import lombok.Getter; +import lombok.Setter; +import lombok.experimental.Accessors; + +/** + * 1. Validates the Schema Field Path specification, specifically that all field IDs must be unique + * across all fields within a schema. 2. Validates that the field path id is not empty. + * + * @see Field + * Path V2 docs + */ +@Setter +@Getter +@Accessors(chain = true) +public class FieldPathValidator extends AspectPayloadValidator { + @Nonnull private AspectPluginConfig config; + + /** Prevent any MCP for SchemaMetadata where field ids are duplicated. */ + @Override + protected Stream validateProposedAspects( + @Nonnull Collection mcpItems, + @Nonnull RetrieverContext retrieverContext) { + + ValidationExceptionCollection exceptions = ValidationExceptionCollection.newCollection(); + + mcpItems.forEach( + i -> { + if (i.getAspectName().equals(SCHEMA_METADATA_ASPECT_NAME)) { + processSchemaMetadataAspect(i, exceptions); + } else { + processEditableSchemaMetadataAspect(i, exceptions); + } + }); + + return exceptions.streamAllExceptions(); + } + + @Override + protected Stream validatePreCommitAspects( + @Nonnull Collection changeMCPs, @Nonnull RetrieverContext retrieverContext) { + return Stream.of(); + } + + private static void processEditableSchemaMetadataAspect( + BatchItem i, ValidationExceptionCollection exceptions) { + final EditableSchemaMetadata schemaMetadata = i.getAspect(EditableSchemaMetadata.class); + final long uniquePaths = + validateAndCount( + i, + schemaMetadata.getEditableSchemaFieldInfo().stream() + .map(EditableSchemaFieldInfo::getFieldPath), + exceptions); + + if (uniquePaths != schemaMetadata.getEditableSchemaFieldInfo().size()) { + exceptions.addException( + i, + String.format( + "Cannot perform %s action on proposal. EditableSchemaMetadata aspect has duplicated field paths", + i.getChangeType())); + } + } + + private static void processSchemaMetadataAspect( + BatchItem i, ValidationExceptionCollection exceptions) { + final SchemaMetadata schemaMetadata = i.getAspect(SchemaMetadata.class); + final long uniquePaths = + validateAndCount( + i, schemaMetadata.getFields().stream().map(SchemaField::getFieldPath), exceptions); + + if (uniquePaths != schemaMetadata.getFields().size()) { + exceptions.addException( + i, + String.format( + "Cannot perform %s action on proposal. SchemaMetadata aspect has duplicated field paths", + i.getChangeType())); + } + } + + private static long validateAndCount( + BatchItem i, Stream fieldPaths, ValidationExceptionCollection exceptions) { + return fieldPaths + .distinct() + // inspect the stream of fieldPath validation errors since we're already iterating + .peek( + fieldPath -> + validateFieldPath(fieldPath) + .ifPresent(message -> exceptions.addException(i, message))) + .count(); + } + + private static Optional validateFieldPath(String fieldPath) { + if (fieldPath == null || fieldPath.isEmpty()) { + return Optional.of("SchemaMetadata aspect has empty field path."); + } + return Optional.empty(); + } +} diff --git a/entity-registry/src/test/java/com/linkedin/metadata/aspect/validators/FieldPathValidatorTest.java b/entity-registry/src/test/java/com/linkedin/metadata/aspect/validators/FieldPathValidatorTest.java new file mode 100644 index 0000000000000..bd5912764edce --- /dev/null +++ b/entity-registry/src/test/java/com/linkedin/metadata/aspect/validators/FieldPathValidatorTest.java @@ -0,0 +1,233 @@ +package com.linkedin.metadata.aspect.validators; + +import static com.linkedin.metadata.Constants.*; +import static org.mockito.Mockito.*; +import static org.testng.Assert.*; + +import com.linkedin.common.urn.DatasetUrn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.aspect.AspectRetriever; +import com.linkedin.metadata.aspect.GraphRetriever; +import com.linkedin.metadata.aspect.RetrieverContext; +import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; +import com.linkedin.metadata.aspect.plugins.validation.AspectValidationException; +import com.linkedin.metadata.aspect.validation.CreateIfNotExistsValidator; +import com.linkedin.metadata.aspect.validation.FieldPathValidator; +import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.schema.EditableSchemaFieldInfo; +import com.linkedin.schema.EditableSchemaFieldInfoArray; +import com.linkedin.schema.EditableSchemaMetadata; +import com.linkedin.schema.SchemaField; +import com.linkedin.schema.SchemaFieldArray; +import com.linkedin.schema.SchemaFieldDataType; +import com.linkedin.schema.SchemaMetadata; +import com.linkedin.schema.StringType; +import com.linkedin.test.metadata.aspect.TestEntityRegistry; +import com.linkedin.test.metadata.aspect.batch.TestMCP; +import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Objects; +import java.util.Set; +import java.util.stream.Collectors; +import javax.annotation.Nullable; +import org.testng.annotations.BeforeTest; +import org.testng.annotations.Test; + +public class FieldPathValidatorTest { + + private static final AspectPluginConfig validatorConfig = + AspectPluginConfig.builder() + .supportedOperations( + Arrays.stream(ChangeType.values()) + .map(Objects::toString) + .collect(Collectors.toList())) + .className(CreateIfNotExistsValidator.class.getName()) + .supportedEntityAspectNames(List.of(AspectPluginConfig.EntityAspectName.ALL)) + .enabled(true) + .build(); + private EntityRegistry entityRegistry; + private RetrieverContext mockRetrieverContext; + private static final DatasetUrn TEST_DATASET_URN; + private final FieldPathValidator test = new FieldPathValidator().setConfig(validatorConfig); + + static { + try { + TEST_DATASET_URN = + DatasetUrn.createFromUrn( + UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:hive,test,PROD)")); + } catch (URISyntaxException e) { + throw new RuntimeException(e); + } + } + + @BeforeTest + public void init() { + entityRegistry = new TestEntityRegistry(); + AspectRetriever mockAspectRetriever = mock(AspectRetriever.class); + when(mockAspectRetriever.getEntityRegistry()).thenReturn(entityRegistry); + GraphRetriever mockGraphRetriever = mock(GraphRetriever.class); + mockRetrieverContext = mock(RetrieverContext.class); + when(mockRetrieverContext.getAspectRetriever()).thenReturn(mockAspectRetriever); + when(mockRetrieverContext.getGraphRetriever()).thenReturn(mockGraphRetriever); + } + + @Test + public void testValidateNonDuplicatedSchemaFieldPath() { + final SchemaMetadata schema = getMockSchemaMetadataAspect(false); + assertEquals( + test.validateProposed( + Set.of( + TestMCP.builder() + .changeType(ChangeType.UPSERT) + .urn(TEST_DATASET_URN) + .entitySpec(entityRegistry.getEntitySpec(TEST_DATASET_URN.getEntityType())) + .aspectSpec( + entityRegistry + .getEntitySpec(TEST_DATASET_URN.getEntityType()) + .getAspectSpec(SCHEMA_METADATA_ASPECT_NAME)) + .recordTemplate(schema) + .build()), + mockRetrieverContext) + .count(), + 0); + } + + @Test + public void testValidateDuplicatedSchemaFieldPath() { + final SchemaMetadata schema = getMockSchemaMetadataAspect(true); + + assertEquals( + test.validateProposed( + Set.of( + TestMCP.builder() + .changeType(ChangeType.UPSERT) + .urn(TEST_DATASET_URN) + .entitySpec(entityRegistry.getEntitySpec(TEST_DATASET_URN.getEntityType())) + .aspectSpec( + entityRegistry + .getEntitySpec(TEST_DATASET_URN.getEntityType()) + .getAspectSpec(SCHEMA_METADATA_ASPECT_NAME)) + .recordTemplate(schema) + .build()), + mockRetrieverContext) + .count(), + 1); + } + + @Test + public void testValidateNonDuplicatedEditableSchemaFieldPath() { + final EditableSchemaMetadata schema = getMockEditableSchemaMetadataAspect(false); + assertEquals( + test.validateProposed( + Set.of( + TestMCP.builder() + .changeType(ChangeType.UPSERT) + .urn(TEST_DATASET_URN) + .entitySpec(entityRegistry.getEntitySpec(TEST_DATASET_URN.getEntityType())) + .aspectSpec( + entityRegistry + .getEntitySpec(TEST_DATASET_URN.getEntityType()) + .getAspectSpec(EDITABLE_SCHEMA_METADATA_ASPECT_NAME)) + .recordTemplate(schema) + .build()), + mockRetrieverContext) + .count(), + 0); + } + + @Test + public void testValidateDuplicatedEditableSchemaFieldPath() { + final EditableSchemaMetadata schema = getMockEditableSchemaMetadataAspect(true); + + assertEquals( + test.validateProposed( + Set.of( + TestMCP.builder() + .changeType(ChangeType.UPSERT) + .urn(TEST_DATASET_URN) + .entitySpec(entityRegistry.getEntitySpec(TEST_DATASET_URN.getEntityType())) + .aspectSpec( + entityRegistry + .getEntitySpec(TEST_DATASET_URN.getEntityType()) + .getAspectSpec(EDITABLE_SCHEMA_METADATA_ASPECT_NAME)) + .recordTemplate(schema) + .build()), + mockRetrieverContext) + .count(), + 1); + } + + @Test + public void testEmptySchemaFieldPath() { + final SchemaMetadata schema = getMockSchemaMetadataAspect(false, ""); + TestMCP testItem = + TestMCP.builder() + .changeType(ChangeType.UPSERT) + .urn(TEST_DATASET_URN) + .entitySpec(entityRegistry.getEntitySpec(TEST_DATASET_URN.getEntityType())) + .aspectSpec( + entityRegistry + .getEntitySpec(TEST_DATASET_URN.getEntityType()) + .getAspectSpec(SCHEMA_METADATA_ASPECT_NAME)) + .recordTemplate(schema) + .build(); + + Set exceptions = + test.validateProposed(Set.of(testItem), mockRetrieverContext).collect(Collectors.toSet()); + + assertEquals( + exceptions, + Set.of( + AspectValidationException.forItem( + testItem, "SchemaMetadata aspect has empty field path."))); + } + + private static SchemaMetadata getMockSchemaMetadataAspect(boolean duplicateFields) { + return getMockSchemaMetadataAspect(duplicateFields, null); + } + + private static SchemaMetadata getMockSchemaMetadataAspect( + boolean duplicateFields, @Nullable String fieldPath) { + List fields = new ArrayList<>(); + fields.add( + new SchemaField() + .setType( + new SchemaFieldDataType() + .setType(SchemaFieldDataType.Type.create(new StringType()))) + .setNullable(false) + .setNativeDataType("string") + .setFieldPath(fieldPath == null ? "test" : fieldPath)); + + if (duplicateFields) { + fields.add( + new SchemaField() + .setType( + new SchemaFieldDataType() + .setType(SchemaFieldDataType.Type.create(new StringType()))) + .setNullable(false) + .setNativeDataType("string") + .setFieldPath(fieldPath == null ? "test" : fieldPath)); + } + + return new SchemaMetadata() + .setPlatform(TEST_DATASET_URN.getPlatformEntity()) + .setFields(new SchemaFieldArray(fields)); + } + + private static EditableSchemaMetadata getMockEditableSchemaMetadataAspect( + boolean duplicateFields) { + + List fields = new ArrayList<>(); + fields.add(new EditableSchemaFieldInfo().setFieldPath("test")); + + if (duplicateFields) { + fields.add(new EditableSchemaFieldInfo().setFieldPath("test")); + } + + return new EditableSchemaMetadata() + .setEditableSchemaFieldInfo(new EditableSchemaFieldInfoArray(fields)); + } +} diff --git a/li-utils/src/main/java/com/linkedin/metadata/Constants.java b/li-utils/src/main/java/com/linkedin/metadata/Constants.java index e085a5876a42b..8961677b56878 100644 --- a/li-utils/src/main/java/com/linkedin/metadata/Constants.java +++ b/li-utils/src/main/java/com/linkedin/metadata/Constants.java @@ -319,6 +319,13 @@ public class Constants { public static final String EXECUTION_REQUEST_INPUT_ASPECT_NAME = "dataHubExecutionRequestInput"; public static final String EXECUTION_REQUEST_SIGNAL_ASPECT_NAME = "dataHubExecutionRequestSignal"; public static final String EXECUTION_REQUEST_RESULT_ASPECT_NAME = "dataHubExecutionRequestResult"; + public static final String EXECUTION_REQUEST_STATUS_RUNNING = "RUNNING"; + public static final String EXECUTION_REQUEST_STATUS_FAILURE = "FAILURE"; + public static final String EXECUTION_REQUEST_STATUS_SUCCESS = "SUCCESS"; + public static final String EXECUTION_REQUEST_STATUS_TIMEOUT = "TIMEOUT"; + public static final String EXECUTION_REQUEST_STATUS_CANCELLED = "CANCELLED"; + public static final String EXECUTION_REQUEST_STATUS_ABORTED = "ABORTED"; + public static final String EXECUTION_REQUEST_STATUS_DUPLICATE = "DUPLICATE"; // DataHub Access Token public static final String ACCESS_TOKEN_KEY_ASPECT_NAME = "dataHubAccessTokenKey"; diff --git a/metadata-ingestion/src/datahub/ingestion/run/pipeline_config.py b/metadata-ingestion/src/datahub/ingestion/run/pipeline_config.py index 98629ba030695..2b2f992249f1e 100644 --- a/metadata-ingestion/src/datahub/ingestion/run/pipeline_config.py +++ b/metadata-ingestion/src/datahub/ingestion/run/pipeline_config.py @@ -1,6 +1,7 @@ import datetime import logging -import uuid +import random +import string from typing import Any, Dict, List, Optional from pydantic import Field, validator @@ -71,6 +72,15 @@ class FlagsConfig(ConfigModel): ) +def _generate_run_id(source_type: Optional[str] = None) -> str: + current_time = datetime.datetime.now().strftime("%Y_%m_%d-%H_%M_%S") + random_suffix = "".join(random.choices(string.ascii_lowercase + string.digits, k=6)) + + if source_type is None: + source_type = "ingestion" + return f"{source_type}-{current_time}-{random_suffix}" + + class PipelineConfig(ConfigModel): source: SourceConfig sink: Optional[DynamicTypedConfig] = None @@ -91,12 +101,11 @@ def run_id_should_be_semantic( cls, v: Optional[str], values: Dict[str, Any], **kwargs: Any ) -> str: if v == DEFAULT_RUN_ID: + source_type = None if "source" in values and hasattr(values["source"], "type"): source_type = values["source"].type - current_time = datetime.datetime.now().strftime("%Y_%m_%d-%H_%M_%S") - return f"{source_type}-{current_time}" - return str(uuid.uuid1()) # default run_id if we cannot infer a source type + return _generate_run_id(source_type) else: assert v is not None return v diff --git a/metadata-ingestion/src/datahub/testing/compare_metadata_json.py b/metadata-ingestion/src/datahub/testing/compare_metadata_json.py index 61b222f8d2dd5..155773f9898b4 100644 --- a/metadata-ingestion/src/datahub/testing/compare_metadata_json.py +++ b/metadata-ingestion/src/datahub/testing/compare_metadata_json.py @@ -27,6 +27,8 @@ r"root\[\d+\]\['aspect'\]\['json'\]\['lastUpdatedTimestamp'\]", r"root\[\d+\]\['aspect'\]\['json'\]\['created'\]", r"root\[\d+\]\['aspect'\]\['json'\]\['lastModified'\]", + r"root\[\d+\].*?\['systemMetadata'\]\['runId'\]", + r"root\[\d+\].*?\['systemMetadata'\]\['lastRunId'\]", ] @@ -82,6 +84,8 @@ def assert_metadata_files_equal( json_path = f"root[{i}]['aspect']['json'][{j}]['value']" ignore_paths = (*ignore_paths, re.escape(json_path)) + ignore_paths = (*ignore_paths, *default_exclude_paths) + diff = diff_metadata_json(output, golden, ignore_paths, ignore_order=ignore_order) if diff and update_golden: if isinstance(diff, MCPDiff) and diff.is_delta_valid: diff --git a/metadata-io/src/main/java/com/linkedin/metadata/aspect/validation/ExecutionRequestResultValidator.java b/metadata-io/src/main/java/com/linkedin/metadata/aspect/validation/ExecutionRequestResultValidator.java new file mode 100644 index 0000000000000..b77d3b48d5bd5 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/aspect/validation/ExecutionRequestResultValidator.java @@ -0,0 +1,70 @@ +package com.linkedin.metadata.aspect.validation; + +import static com.linkedin.metadata.Constants.EXECUTION_REQUEST_STATUS_ABORTED; +import static com.linkedin.metadata.Constants.EXECUTION_REQUEST_STATUS_CANCELLED; +import static com.linkedin.metadata.Constants.EXECUTION_REQUEST_STATUS_DUPLICATE; +import static com.linkedin.metadata.Constants.EXECUTION_REQUEST_STATUS_SUCCESS; + +import com.linkedin.execution.ExecutionRequestResult; +import com.linkedin.metadata.aspect.RetrieverContext; +import com.linkedin.metadata.aspect.batch.BatchItem; +import com.linkedin.metadata.aspect.batch.ChangeMCP; +import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; +import com.linkedin.metadata.aspect.plugins.validation.AspectPayloadValidator; +import com.linkedin.metadata.aspect.plugins.validation.AspectValidationException; +import java.util.Collection; +import java.util.Objects; +import java.util.Set; +import java.util.stream.Stream; +import javax.annotation.Nonnull; +import lombok.Getter; +import lombok.Setter; +import lombok.experimental.Accessors; +import lombok.extern.slf4j.Slf4j; + +/** A Validator for StructuredProperties Aspect that is attached to entities like Datasets, etc. */ +@Setter +@Getter +@Slf4j +@Accessors(chain = true) +public class ExecutionRequestResultValidator extends AspectPayloadValidator { + private static final Set IMMUTABLE_STATUS = + Set.of( + EXECUTION_REQUEST_STATUS_ABORTED, + EXECUTION_REQUEST_STATUS_CANCELLED, + EXECUTION_REQUEST_STATUS_SUCCESS, + EXECUTION_REQUEST_STATUS_DUPLICATE); + + @Nonnull private AspectPluginConfig config; + + @Override + protected Stream validateProposedAspects( + @Nonnull Collection mcpItems, + @Nonnull RetrieverContext retrieverContext) { + return Stream.of(); + } + + @Override + protected Stream validatePreCommitAspects( + @Nonnull Collection changeMCPs, @Nonnull RetrieverContext retrieverContext) { + return changeMCPs.stream() + .filter(item -> item.getPreviousRecordTemplate() != null) + .map( + item -> { + ExecutionRequestResult existingResult = + item.getPreviousAspect(ExecutionRequestResult.class); + + if (IMMUTABLE_STATUS.contains(existingResult.getStatus())) { + ExecutionRequestResult currentResult = item.getAspect(ExecutionRequestResult.class); + return AspectValidationException.forItem( + item, + String.format( + "Invalid update to immutable state for aspect dataHubExecutionRequestResult. Execution urn: %s previous status: %s. Denied status update: %s", + item.getUrn(), existingResult.getStatus(), currentResult.getStatus())); + } + + return null; + }) + .filter(Objects::nonNull); + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/aspect/validation/ExecutionRequestResultValidatorTest.java b/metadata-io/src/test/java/com/linkedin/metadata/aspect/validation/ExecutionRequestResultValidatorTest.java new file mode 100644 index 0000000000000..f46772ca7b350 --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/aspect/validation/ExecutionRequestResultValidatorTest.java @@ -0,0 +1,166 @@ +package com.linkedin.metadata.aspect.validation; + +import static com.linkedin.metadata.Constants.EXECUTION_REQUEST_ENTITY_NAME; +import static com.linkedin.metadata.Constants.EXECUTION_REQUEST_RESULT_ASPECT_NAME; +import static com.linkedin.metadata.Constants.EXECUTION_REQUEST_STATUS_ABORTED; +import static com.linkedin.metadata.Constants.EXECUTION_REQUEST_STATUS_CANCELLED; +import static com.linkedin.metadata.Constants.EXECUTION_REQUEST_STATUS_DUPLICATE; +import static com.linkedin.metadata.Constants.EXECUTION_REQUEST_STATUS_FAILURE; +import static com.linkedin.metadata.Constants.EXECUTION_REQUEST_STATUS_RUNNING; +import static com.linkedin.metadata.Constants.EXECUTION_REQUEST_STATUS_SUCCESS; +import static com.linkedin.metadata.Constants.EXECUTION_REQUEST_STATUS_TIMEOUT; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertTrue; + +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.execution.ExecutionRequestResult; +import com.linkedin.metadata.aspect.RetrieverContext; +import com.linkedin.metadata.aspect.SystemAspect; +import com.linkedin.metadata.aspect.batch.ChangeMCP; +import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; +import com.linkedin.metadata.aspect.plugins.validation.AspectValidationException; +import com.linkedin.metadata.entity.ebean.batch.ChangeItemImpl; +import com.linkedin.metadata.utils.AuditStampUtils; +import io.datahubproject.metadata.context.OperationContext; +import io.datahubproject.test.metadata.context.TestOperationContexts; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import org.testng.annotations.Test; + +public class ExecutionRequestResultValidatorTest { + private static final OperationContext TEST_CONTEXT = + TestOperationContexts.systemContextNoSearchAuthorization(); + private static final AspectPluginConfig TEST_PLUGIN_CONFIG = + AspectPluginConfig.builder() + .className(ExecutionRequestResultValidator.class.getName()) + .enabled(true) + .supportedOperations(List.of("UPSERT")) + .supportedEntityAspectNames( + List.of( + AspectPluginConfig.EntityAspectName.builder() + .entityName(EXECUTION_REQUEST_ENTITY_NAME) + .aspectName(EXECUTION_REQUEST_RESULT_ASPECT_NAME) + .build())) + .build(); + private static final Urn TEST_URN = UrnUtils.getUrn("urn:li:dataHubExecutionRequest:xyz"); + + @Test + public void testAllowed() { + ExecutionRequestResultValidator test = new ExecutionRequestResultValidator(); + test.setConfig(TEST_PLUGIN_CONFIG); + + Set allowedUpdateStates = + Set.of( + EXECUTION_REQUEST_STATUS_RUNNING, + EXECUTION_REQUEST_STATUS_FAILURE, + EXECUTION_REQUEST_STATUS_TIMEOUT); + Set destinationStates = new HashSet<>(allowedUpdateStates); + destinationStates.addAll( + Set.of( + EXECUTION_REQUEST_STATUS_ABORTED, + EXECUTION_REQUEST_STATUS_CANCELLED, + EXECUTION_REQUEST_STATUS_SUCCESS, + EXECUTION_REQUEST_STATUS_DUPLICATE)); + + List testItems = + new ArrayList<>( + // Tests with previous state + allowedUpdateStates.stream() + .flatMap( + prevState -> + destinationStates.stream() + .map( + destState -> { + SystemAspect prevData = mock(SystemAspect.class); + when(prevData.getRecordTemplate()) + .thenReturn( + new ExecutionRequestResult().setStatus(prevState)); + return ChangeItemImpl.builder() + .changeType(ChangeType.UPSERT) + .urn(TEST_URN) + .aspectName(EXECUTION_REQUEST_RESULT_ASPECT_NAME) + .recordTemplate( + new ExecutionRequestResult().setStatus(destState)) + .previousSystemAspect(prevData) + .auditStamp(AuditStampUtils.createDefaultAuditStamp()) + .build(TEST_CONTEXT.getAspectRetriever()); + })) + .toList()); + // Tests with no previous + testItems.addAll( + destinationStates.stream() + .map( + destState -> + ChangeItemImpl.builder() + .changeType(ChangeType.UPSERT) + .urn(TEST_URN) + .aspectName(EXECUTION_REQUEST_RESULT_ASPECT_NAME) + .recordTemplate(new ExecutionRequestResult().setStatus(destState)) + .auditStamp(AuditStampUtils.createDefaultAuditStamp()) + .build(TEST_CONTEXT.getAspectRetriever())) + .toList()); + + List result = + test.validatePreCommitAspects(testItems, mock(RetrieverContext.class)).toList(); + + assertTrue(result.isEmpty(), "Did not expect any validation errors."); + } + + @Test + public void testDenied() { + ExecutionRequestResultValidator test = new ExecutionRequestResultValidator(); + test.setConfig(TEST_PLUGIN_CONFIG); + + Set deniedUpdateStates = + Set.of( + EXECUTION_REQUEST_STATUS_ABORTED, + EXECUTION_REQUEST_STATUS_CANCELLED, + EXECUTION_REQUEST_STATUS_SUCCESS, + EXECUTION_REQUEST_STATUS_DUPLICATE); + Set destinationStates = new HashSet<>(deniedUpdateStates); + destinationStates.addAll( + Set.of( + EXECUTION_REQUEST_STATUS_RUNNING, + EXECUTION_REQUEST_STATUS_FAILURE, + EXECUTION_REQUEST_STATUS_TIMEOUT)); + + List testItems = + new ArrayList<>( + // Tests with previous state + deniedUpdateStates.stream() + .flatMap( + prevState -> + destinationStates.stream() + .map( + destState -> { + SystemAspect prevData = mock(SystemAspect.class); + when(prevData.getRecordTemplate()) + .thenReturn( + new ExecutionRequestResult().setStatus(prevState)); + return ChangeItemImpl.builder() + .changeType(ChangeType.UPSERT) + .urn(TEST_URN) + .aspectName(EXECUTION_REQUEST_RESULT_ASPECT_NAME) + .recordTemplate( + new ExecutionRequestResult().setStatus(destState)) + .previousSystemAspect(prevData) + .auditStamp(AuditStampUtils.createDefaultAuditStamp()) + .build(TEST_CONTEXT.getAspectRetriever()); + })) + .toList()); + + List result = + test.validatePreCommitAspects(testItems, mock(RetrieverContext.class)).toList(); + + assertEquals( + result.size(), + deniedUpdateStates.size() * destinationStates.size(), + "Expected ALL items to be denied."); + } +} diff --git a/metadata-models/src/main/resources/entity-registry.yml b/metadata-models/src/main/resources/entity-registry.yml index 9b692b51dc2b5..ec9c3fee1c404 100644 --- a/metadata-models/src/main/resources/entity-registry.yml +++ b/metadata-models/src/main/resources/entity-registry.yml @@ -673,6 +673,12 @@ plugins: supportedEntityAspectNames: - entityName: '*' aspectName: '*' + - className: 'com.linkedin.metadata.aspect.plugins.validation.AspectPayloadValidator' + enabled: true + spring: + enabled: true + packageScan: + - com.linkedin.gms.factory.plugins mcpSideEffects: - className: 'com.linkedin.metadata.structuredproperties.hooks.PropertyDefinitionDeleteSideEffect' packageScan: diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/plugins/SpringStandardPluginConfiguration.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/plugins/SpringStandardPluginConfiguration.java index 4a2095685abe1..943b1c7184a60 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/plugins/SpringStandardPluginConfiguration.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/plugins/SpringStandardPluginConfiguration.java @@ -1,5 +1,8 @@ package com.linkedin.gms.factory.plugins; +import static com.linkedin.metadata.Constants.EDITABLE_SCHEMA_METADATA_ASPECT_NAME; +import static com.linkedin.metadata.Constants.EXECUTION_REQUEST_ENTITY_NAME; +import static com.linkedin.metadata.Constants.EXECUTION_REQUEST_RESULT_ASPECT_NAME; import static com.linkedin.metadata.Constants.SCHEMA_METADATA_ASPECT_NAME; import com.linkedin.metadata.Constants; @@ -7,6 +10,9 @@ import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; import com.linkedin.metadata.aspect.plugins.hooks.MCPSideEffect; import com.linkedin.metadata.aspect.plugins.hooks.MutationHook; +import com.linkedin.metadata.aspect.plugins.validation.AspectPayloadValidator; +import com.linkedin.metadata.aspect.validation.ExecutionRequestResultValidator; +import com.linkedin.metadata.aspect.validation.FieldPathValidator; import com.linkedin.metadata.dataproducts.sideeffects.DataProductUnsetSideEffect; import com.linkedin.metadata.schemafields.sideeffects.SchemaFieldSideEffect; import com.linkedin.metadata.timeline.eventgenerator.EntityChangeEventGeneratorRegistry; @@ -21,6 +27,7 @@ @Configuration @Slf4j public class SpringStandardPluginConfiguration { + private static final String ALL = "*"; @Value("${metadataChangeProposal.validation.ignoreUnknown}") private boolean ignoreUnknownEnabled; @@ -104,4 +111,43 @@ public MCPSideEffect dataProductUnsetSideEffect() { log.info("Initialized {}", SchemaFieldSideEffect.class.getName()); return new DataProductUnsetSideEffect().setConfig(config); } + + @Bean + public AspectPayloadValidator fieldPathValidator() { + return new FieldPathValidator() + .setConfig( + AspectPluginConfig.builder() + .className(FieldPathValidator.class.getName()) + .enabled(true) + .supportedOperations( + List.of("CREATE", "CREATE_ENTITY", "UPSERT", "UPDATE", "RESTATE")) + .supportedEntityAspectNames( + List.of( + AspectPluginConfig.EntityAspectName.builder() + .entityName(ALL) + .aspectName(SCHEMA_METADATA_ASPECT_NAME) + .build(), + AspectPluginConfig.EntityAspectName.builder() + .entityName(ALL) + .aspectName(EDITABLE_SCHEMA_METADATA_ASPECT_NAME) + .build())) + .build()); + } + + @Bean + public AspectPayloadValidator dataHubExecutionRequestResultValidator() { + return new ExecutionRequestResultValidator() + .setConfig( + AspectPluginConfig.builder() + .className(ExecutionRequestResultValidator.class.getName()) + .enabled(true) + .supportedOperations(List.of("UPSERT", "UPDATE")) + .supportedEntityAspectNames( + List.of( + AspectPluginConfig.EntityAspectName.builder() + .entityName(EXECUTION_REQUEST_ENTITY_NAME) + .aspectName(EXECUTION_REQUEST_RESULT_ASPECT_NAME) + .build())) + .build()); + } } From 555f391c24fe245c96fdbdb7462ab80e9a87acaa Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Tue, 15 Oct 2024 13:14:21 -0500 Subject: [PATCH 28/50] fix(ci): Update contributor-open-pr-comment.yml (#11631) --- .github/workflows/contributor-open-pr-comment.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/contributor-open-pr-comment.yml b/.github/workflows/contributor-open-pr-comment.yml index b3da16ca994ba..2f700290ee0f2 100644 --- a/.github/workflows/contributor-open-pr-comment.yml +++ b/.github/workflows/contributor-open-pr-comment.yml @@ -4,6 +4,9 @@ on: pull_request: types: [opened] +permissions: + pull-requests: write + jobs: post-pr-opened-comment: runs-on: ubuntu-latest From 14a22bfeaf8bedcb873da6dbcb0a40833fde96a3 Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Tue, 15 Oct 2024 13:14:46 -0500 Subject: [PATCH 29/50] fix(ci): add runtime limit (#11630) --- .github/workflows/metadata-ingestion.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/metadata-ingestion.yml b/.github/workflows/metadata-ingestion.yml index c718febca398a..92dcb3d8ac289 100644 --- a/.github/workflows/metadata-ingestion.yml +++ b/.github/workflows/metadata-ingestion.yml @@ -26,6 +26,7 @@ concurrency: jobs: metadata-ingestion: runs-on: ubuntu-latest + timeout-minutes: 40 env: SPARK_VERSION: 3.3.2 DATAHUB_TELEMETRY_ENABLED: false From 2bc96e9e69f4523cd5eb27bf0b60e309694b7b17 Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Tue, 15 Oct 2024 14:43:36 -0500 Subject: [PATCH 30/50] fix(ci): metadata-io req python (#11632) --- .github/workflows/metadata-io.yml | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/.github/workflows/metadata-io.yml b/.github/workflows/metadata-io.yml index 7018b42949e89..5ee2223d71b03 100644 --- a/.github/workflows/metadata-io.yml +++ b/.github/workflows/metadata-io.yml @@ -57,17 +57,16 @@ jobs: - name: Disk Check run: df -h . && docker images - uses: acryldata/sane-checkout-action@v3 + - uses: actions/setup-python@v5 + with: + python-version: "3.10" + cache: "pip" - name: Set up JDK 17 uses: actions/setup-java@v4 with: distribution: "zulu" java-version: 17 - uses: gradle/actions/setup-gradle@v3 - - uses: actions/setup-python@v5 - if: ${{ needs.setup.outputs.ingestion_change == 'true' }} - with: - python-version: "3.10" - cache: "pip" - name: Gradle build (and test) run: | ./gradlew :metadata-io:test From 2f2a7af58428563a3acb2963c8278445513f8e85 Mon Sep 17 00:00:00 2001 From: Hyejin Yoon <0327jane@gmail.com> Date: Wed, 16 Oct 2024 15:49:59 +0900 Subject: [PATCH 31/50] feat: add quickstart post (#11623) --- .../examples/mce_files/bootstrap_mce.json | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/metadata-ingestion/examples/mce_files/bootstrap_mce.json b/metadata-ingestion/examples/mce_files/bootstrap_mce.json index bc218e5e8c2d5..f0c4e7ff996ed 100644 --- a/metadata-ingestion/examples/mce_files/bootstrap_mce.json +++ b/metadata-ingestion/examples/mce_files/bootstrap_mce.json @@ -3613,6 +3613,33 @@ }, "systemMetadata": null }, + { + "entityType": "post", + "entityUrn": "urn:li:post:f3a68539-f7e4-4c41-a4fd-9e57c085d8de", + "changeType": "UPSERT", + "aspectName": "postInfo", + "aspect": { + "json": { + "type": "HOME_PAGE_ANNOUNCEMENT", + "content": { + "title": "Join Metadata & AI Summit 2024", + "type": "LINK", + "link": "http://www.acryldata.io/conference?utm_source=datahub_quickstart&utm_medium=metadata_ai_2024&utm_campaign=pinned_announcement", + "media": { + "type": "IMAGE", + "location": "https://formulatedby.com/wp-content/uploads/2024/07/0193320a6d93e7508d1598f7b24662f75a87e92f-352x456-1.svg" + } + }, + "created": 1712547125049, + "lastModified": 1712547125049 + } + }, + "systemMetadata": { + "lastObserved": 1712548844816, + "runId": "datahub-2024_04_08-13_00_44", + "lastRunId": "no-run-id-provided" + } + }, { "entityType": "post", "entityUrn": "urn:li:post:f3a68539-f7e4-4c41-a4fd-9e57c085d8dd", From e76647dd7a094907bdfa22682b79b35965e5537c Mon Sep 17 00:00:00 2001 From: Tamas Nemeth Date: Wed, 16 Oct 2024 10:04:01 +0200 Subject: [PATCH 32/50] feat(ingest/bigquery): Generate platform resource entities for BigQuery labels (#11602) Co-authored-by: Shirshanka Das --- .../ingestion/source/bigquery_v2/bigquery.py | 1 + .../bigquery_platform_resource_helper.py | 144 +++++++++++++ .../source/bigquery_v2/bigquery_schema_gen.py | 114 +++++++++-- .../bigquery_v2/bigquery_mcp_golden.json | 192 ++++++++++++++++++ .../integration/bigquery_v2/test_bigquery.py | 41 +++- .../tests/unit/test_bigquery_source.py | 74 ++++--- 6 files changed, 517 insertions(+), 49 deletions(-) create mode 100644 metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py index 4cc3ec50bacd4..c30dade921d25 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py @@ -188,6 +188,7 @@ def __init__(self, ctx: PipelineContext, config: BigQueryV2Config): self.sql_parser_schema_resolver, self.profiler, self.identifiers, + self.ctx.graph, ) self.add_config_to_report() diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py new file mode 100644 index 0000000000000..d2da895be985d --- /dev/null +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py @@ -0,0 +1,144 @@ +import logging +from dataclasses import dataclass +from typing import Optional + +import cachetools +from pydantic import BaseModel, ValidationError + +from datahub.api.entities.platformresource.platform_resource import ( + PlatformResource, + PlatformResourceKey, +) +from datahub.ingestion.graph.client import DataHubGraph +from datahub.metadata.urns import TagUrn + +logger: logging.Logger = logging.getLogger(__name__) + + +@dataclass +class BigQueryLabel: + key: str + value: Optional[str] + + def primary_key(self) -> str: + return f"{self.key}/{self.value}" if self.value else f"{self.key}" + + +class BigQueryLabelInfo(BaseModel): + datahub_urn: str + managed_by_datahub: bool + key: str + value: str + + +@dataclass() +class BigQueryLabelPlatformResource: + datahub_urn: str + project: Optional[str] + managed_by_datahub: bool + label: BigQueryLabel + + def platform_resource_key(self) -> PlatformResourceKey: + return PlatformResourceKey( + platform="bigquery", + resource_type="BigQueryLabelInfo", + platform_instance=self.project, + primary_key=self.label.primary_key(), + ) + + def platform_resource_info(self) -> BigQueryLabelInfo: + bq_label_info = BigQueryLabelInfo( + datahub_urn=self.datahub_urn, + managed_by_datahub=self.managed_by_datahub, + key=self.label.key, + value=self.label.value, + ) + return bq_label_info + + def platform_resource(self) -> PlatformResource: + return PlatformResource.create( + key=self.platform_resource_key(), + secondary_keys=[self.datahub_urn], + value=self.platform_resource_info(), + ) + + +class BigQueryPlatformResourceHelper: + def __init__( + self, + bq_project: Optional[str], + graph: Optional[DataHubGraph], + ): + self.bq_project = bq_project + self.graph = graph + + platform_resource_cache: cachetools.LRUCache = cachetools.LRUCache(maxsize=500) + + def get_platform_resource( + self, platform_resource_key: PlatformResourceKey + ) -> Optional[PlatformResource]: + # if graph is not available we always create a new PlatformResource + if not self.graph: + return None + if self.platform_resource_cache.get(platform_resource_key.primary_key): + return self.platform_resource_cache.get(platform_resource_key.primary_key) + + platform_resource = PlatformResource.from_datahub( + key=platform_resource_key, graph_client=self.graph + ) + if platform_resource: + self.platform_resource_cache[ + platform_resource_key.primary_key + ] = platform_resource + return platform_resource + return None + + def generate_label_platform_resource( + self, + bigquery_label: BigQueryLabel, + tag_urn: TagUrn, + managed_by_datahub: bool = True, + ) -> PlatformResource: + new_platform_resource = BigQueryLabelPlatformResource( + datahub_urn=tag_urn.urn(), + project=self.bq_project, + managed_by_datahub=managed_by_datahub, + label=bigquery_label, + ) + + platform_resource = self.get_platform_resource( + new_platform_resource.platform_resource_key() + ) + if platform_resource: + if ( + platform_resource.resource_info + and platform_resource.resource_info.value + ): + try: + existing_info: Optional[BigQueryLabelInfo] = platform_resource.resource_info.value.as_pydantic_object(BigQueryLabelInfo) # type: ignore + except ValidationError as e: + logger.error( + f"Error converting existing value to BigQueryLabelInfo: {e}. Creating new one. Maybe this is because of a non backward compatible schema change." + ) + existing_info = None + + if existing_info: + if ( + new_platform_resource.platform_resource_info() == existing_info + or existing_info.managed_by_datahub + ): + return platform_resource + else: + raise ValueError( + f"Datahub URN mismatch for platform resources. Old (existing) platform resource: {platform_resource} and new platform resource: {new_platform_resource}" + ) + + logger.info(f"Created platform resource {new_platform_resource}") + + self.platform_resource_cache.update( + { + new_platform_resource.platform_resource_key().primary_key: new_platform_resource.platform_resource() + } + ) + + return new_platform_resource.platform_resource() diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py index 11d06771d4e4f..1235f638f68ff 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py @@ -6,6 +6,7 @@ from google.cloud.bigquery.table import TableListItem +from datahub.api.entities.platformresource.platform_resource import PlatformResource from datahub.configuration.pattern_utils import is_schema_allowed, is_tag_allowed from datahub.emitter.mce_builder import make_tag_urn from datahub.emitter.mcp import MetadataChangeProposalWrapper @@ -16,6 +17,7 @@ ClassificationHandler, classification_workunit_processor, ) +from datahub.ingestion.graph.client import DataHubGraph from datahub.ingestion.source.bigquery_v2.bigquery_audit import ( BigqueryTableIdentifier, BigQueryTableRef, @@ -25,6 +27,11 @@ from datahub.ingestion.source.bigquery_v2.bigquery_helper import ( unquote_and_decode_unicode_escape_seq, ) +from datahub.ingestion.source.bigquery_v2.bigquery_platform_resource_helper import ( + BigQueryLabel, + BigQueryLabelInfo, + BigQueryPlatformResourceHelper, +) from datahub.ingestion.source.bigquery_v2.bigquery_report import BigQueryV2Report from datahub.ingestion.source.bigquery_v2.bigquery_schema import ( BigqueryColumn, @@ -84,6 +91,7 @@ GlobalTagsClass, TagAssociationClass, ) +from datahub.metadata.urns import TagUrn from datahub.sql_parsing.schema_resolver import SchemaResolver from datahub.utilities.file_backed_collections import FileBackedDict from datahub.utilities.hive_schema_to_avro import ( @@ -160,6 +168,7 @@ def __init__( sql_parser_schema_resolver: SchemaResolver, profiler: BigqueryProfiler, identifiers: BigQueryIdentifierBuilder, + graph: Optional[DataHubGraph] = None, ): self.config = config self.report = report @@ -168,6 +177,7 @@ def __init__( self.sql_parser_schema_resolver = sql_parser_schema_resolver self.profiler = profiler self.identifiers = identifiers + self.graph = graph self.classification_handler = ClassificationHandler(self.config, self.report) self.data_reader: Optional[BigQueryDataReader] = None @@ -188,6 +198,21 @@ def __init__( # Maps snapshot ref -> Snapshot self.snapshots_by_ref: FileBackedDict[BigqueryTableSnapshot] = FileBackedDict() + bq_project = ( + self.config.project_on_behalf + if self.config.project_on_behalf + else self.config.credential.project_id + if self.config.credential + else None + ) + + self.platform_resource_helper: BigQueryPlatformResourceHelper = ( + BigQueryPlatformResourceHelper( + bq_project, + self.graph, + ) + ) + @property def store_table_refs(self): return ( @@ -264,13 +289,28 @@ def gen_dataset_containers( ) -> Iterable[MetadataWorkUnit]: schema_container_key = self.gen_dataset_key(project_id, dataset) - tags_joined: Optional[List[str]] = None + tags_joined: List[str] = [] if tags and self.config.capture_dataset_label_as_tag: - tags_joined = [ - self.make_tag_from_label(k, v) - for k, v in tags.items() - if is_tag_allowed(self.config.capture_dataset_label_as_tag, k) - ] + for k, v in tags.items(): + if is_tag_allowed(self.config.capture_dataset_label_as_tag, k): + tag_urn = TagUrn.from_string(self.make_tag_urn_from_label(k, v)) + label = BigQueryLabel(key=k, value=v) + try: + platform_resource: PlatformResource = self.platform_resource_helper.generate_label_platform_resource( + label, tag_urn, managed_by_datahub=False + ) + label_info: BigQueryLabelInfo = platform_resource.resource_info.value.as_pydantic_object( # type: ignore + BigQueryLabelInfo + ) + tag_urn = TagUrn.from_string(label_info.datahub_urn) + + for mcpw in platform_resource.to_mcps(): + yield mcpw.as_workunit() + except ValueError as e: + logger.warning( + f"Failed to generate platform resource for label {k}:{v}: {e}" + ) + tags_joined.append(tag_urn.urn()) database_container_key = self.gen_project_id_key(database=project_id) @@ -676,10 +716,11 @@ def _process_snapshot( dataset_name=dataset_name, ) - def make_tag_from_label(self, key: str, value: str) -> str: - if not value.startswith(ENCODED_TAG_PREFIX): + def make_tag_urn_from_label(self, key: str, value: str) -> str: + if value: return make_tag_urn(f"""{key}:{value}""") - return self.modified_base32decode(value) + else: + return make_tag_urn(key) def gen_table_dataset_workunits( self, @@ -724,13 +765,26 @@ def gen_table_dataset_workunits( tags_to_add = None if table.labels and self.config.capture_table_label_as_tag: tags_to_add = [] - tags_to_add.extend( - [ - self.make_tag_from_label(k, v) - for k, v in table.labels.items() - if is_tag_allowed(self.config.capture_table_label_as_tag, k) - ] - ) + for k, v in table.labels.items(): + if is_tag_allowed(self.config.capture_table_label_as_tag, k): + tag_urn = TagUrn.from_string(self.make_tag_urn_from_label(k, v)) + try: + label = BigQueryLabel(key=k, value=v) + platform_resource: PlatformResource = self.platform_resource_helper.generate_label_platform_resource( + label, tag_urn, managed_by_datahub=False + ) + label_info: BigQueryLabelInfo = platform_resource.resource_info.value.as_pydantic_object( # type: ignore + BigQueryLabelInfo + ) + tag_urn = TagUrn.from_string(label_info.datahub_urn) + + for mcpw in platform_resource.to_mcps(): + yield mcpw.as_workunit() + except ValueError as e: + logger.warning( + f"Failed to generate platform resource for label {k}:{v}: {e}" + ) + tags_to_add.append(tag_urn.urn()) yield from self.gen_dataset_workunits( table=table, @@ -749,13 +803,29 @@ def gen_view_dataset_workunits( project_id: str, dataset_name: str, ) -> Iterable[MetadataWorkUnit]: - tags_to_add = None + tags_to_add = [] if table.labels and self.config.capture_view_label_as_tag: - tags_to_add = [ - self.make_tag_from_label(k, v) - for k, v in table.labels.items() - if is_tag_allowed(self.config.capture_view_label_as_tag, k) - ] + for k, v in table.labels.items(): + if is_tag_allowed(self.config.capture_view_label_as_tag, k): + tag_urn = TagUrn.from_string(self.make_tag_urn_from_label(k, v)) + try: + label = BigQueryLabel(key=k, value=v) + platform_resource: PlatformResource = self.platform_resource_helper.generate_label_platform_resource( + label, tag_urn, managed_by_datahub=False + ) + label_info: BigQueryLabelInfo = platform_resource.resource_info.value.as_pydantic_object( # type: ignore + BigQueryLabelInfo + ) + tag_urn = TagUrn.from_string(label_info.datahub_urn) + + for mcpw in platform_resource.to_mcps(): + yield mcpw.as_workunit() + except ValueError as e: + logger.warning( + f"Failed to generate platform resource for label {k}:{v}: {e}" + ) + + tags_to_add.append(tag_urn.urn()) yield from self.gen_dataset_workunits( table=table, columns=columns, diff --git a/metadata-ingestion/tests/integration/bigquery_v2/bigquery_mcp_golden.json b/metadata-ingestion/tests/integration/bigquery_v2/bigquery_mcp_golden.json index 02660f0fae08e..b268926f155b7 100644 --- a/metadata-ingestion/tests/integration/bigquery_v2/bigquery_mcp_golden.json +++ b/metadata-ingestion/tests/integration/bigquery_v2/bigquery_mcp_golden.json @@ -199,6 +199,49 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "platformResource", + "entityUrn": "urn:li:platformResource:79d443a7956814fdab2168e11392bbf2", + "changeType": "UPSERT", + "aspectName": "platformResourceInfo", + "aspect": { + "json": { + "resourceType": "BigQueryLabelInfo", + "primaryKey": "priority/high", + "secondaryKeys": [ + "urn:li:tag:priority:high" + ], + "value": { + "blob": "{\"datahub_urn\": \"urn:li:tag:priority:high\", \"managed_by_datahub\": false, \"key\": \"priority\", \"value\": \"high\"}", + "contentType": "JSON", + "schemaType": "JSON", + "schemaRef": "BigQueryLabelInfo" + } + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "platformResource", + "entityUrn": "urn:li:platformResource:79d443a7956814fdab2168e11392bbf2", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:bigquery", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:bigquery,project-id-1)" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.table-1,PROD)", @@ -215,6 +258,49 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "platformResource", + "entityUrn": "urn:li:platformResource:0a8c87e84bd90486c4fd57bbae6557e3", + "changeType": "UPSERT", + "aspectName": "platformResourceInfo", + "aspect": { + "json": { + "resourceType": "BigQueryLabelInfo", + "primaryKey": "purchase", + "secondaryKeys": [ + "urn:li:tag:purchase" + ], + "value": { + "blob": "{\"datahub_urn\": \"urn:li:tag:purchase\", \"managed_by_datahub\": false, \"key\": \"purchase\", \"value\": \"\"}", + "contentType": "JSON", + "schemaType": "JSON", + "schemaRef": "BigQueryLabelInfo" + } + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "platformResource", + "entityUrn": "urn:li:platformResource:0a8c87e84bd90486c4fd57bbae6557e3", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:bigquery", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:bigquery,project-id-1)" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.table-1,PROD)", @@ -309,6 +395,38 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "platformResource", + "entityUrn": "urn:li:platformResource:79d443a7956814fdab2168e11392bbf2", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "platformResource", + "entityUrn": "urn:li:platformResource:0a8c87e84bd90486c4fd57bbae6557e3", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.table-1,PROD)", @@ -330,6 +448,45 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "platformResource", + "entityUrn": "urn:li:platformResource:7da6409504c5c6444b4ce60b0239b759", + "changeType": "UPSERT", + "aspectName": "platformResourceInfo", + "aspect": { + "json": { + "resourceType": "BigQueryLabelInfo", + "primaryKey": "mixedcasetag", + "value": { + "blob": "{\"datahub_urn\": \"urn:li:tag:MixedCaseTag\", \"managed_by_datahub\": true, \"key\": \"mixedcasetag\", \"value\": \"\"}", + "contentType": "JSON", + "schemaType": "JSON", + "schemaRef": "BigQueryLabelInfo" + } + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "platformResource", + "entityUrn": "urn:li:platformResource:7da6409504c5c6444b4ce60b0239b759", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:bigquery" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.table-1,PROD)", @@ -343,6 +500,9 @@ }, { "tag": "urn:li:tag:purchase" + }, + { + "tag": "urn:li:tag:MixedCaseTag" } ] } @@ -353,6 +513,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "platformResource", + "entityUrn": "urn:li:platformResource:7da6409504c5c6444b4ce60b0239b759", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.table-1,PROD)", @@ -1082,5 +1258,21 @@ "runId": "bigquery-2022_02_03-07_00_00", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "tag", + "entityUrn": "urn:li:tag:MixedCaseTag", + "changeType": "UPSERT", + "aspectName": "tagKey", + "aspect": { + "json": { + "name": "MixedCaseTag" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/bigquery_v2/test_bigquery.py b/metadata-ingestion/tests/integration/bigquery_v2/test_bigquery.py index 0ac4e94a5a24f..39cefcb42f360 100644 --- a/metadata-ingestion/tests/integration/bigquery_v2/test_bigquery.py +++ b/metadata-ingestion/tests/integration/bigquery_v2/test_bigquery.py @@ -1,12 +1,16 @@ import random import string from datetime import datetime, timezone -from typing import Any, Dict +from typing import Any, Dict, Optional from unittest.mock import MagicMock, patch from freezegun import freeze_time from google.cloud.bigquery.table import TableListItem +from datahub.api.entities.platformresource.platform_resource import ( + PlatformResource, + PlatformResourceKey, +) from datahub.ingestion.glossary.classifier import ( ClassificationConfig, DynamicTypedClassifierConfig, @@ -14,6 +18,10 @@ from datahub.ingestion.glossary.datahub_classifier import DataHubClassifierConfig from datahub.ingestion.source.bigquery_v2.bigquery_audit import BigqueryTableIdentifier from datahub.ingestion.source.bigquery_v2.bigquery_data_reader import BigQueryDataReader +from datahub.ingestion.source.bigquery_v2.bigquery_platform_resource_helper import ( + BigQueryLabelInfo, + BigQueryPlatformResourceHelper, +) from datahub.ingestion.source.bigquery_v2.bigquery_schema import ( BigqueryColumn, BigqueryDataset, @@ -51,6 +59,13 @@ def recipe(mcp_output_path: str, source_config_override: dict = {}) -> dict: "type": "bigquery", "config": { "project_ids": ["project-id-1"], + "credential": { + "project_id": "project-id-1", + "private_key_id": "private_key_id", + "private_key": "private_key", + "client_email": "client_email", + "client_id": "client_id", + }, "include_usage_statistics": False, "include_table_lineage": True, "include_data_platform_instance": True, @@ -82,6 +97,7 @@ def recipe(mcp_output_path: str, source_config_override: dict = {}) -> dict: @patch.object(BigQuerySchemaApi, "get_datasets_for_project_id") @patch.object(BigQuerySchemaApi, "get_columns_for_dataset") @patch.object(BigQueryDataReader, "get_sample_data_for_table") +@patch.object(BigQueryPlatformResourceHelper, "get_platform_resource") @patch("google.cloud.bigquery.Client") @patch("google.cloud.datacatalog_v1.PolicyTagManagerClient") @patch("google.cloud.resourcemanager_v3.ProjectsClient") @@ -89,6 +105,7 @@ def test_bigquery_v2_ingest( client, policy_tag_manager_client, projects_client, + get_platform_resource, get_sample_data_for_table, get_columns_for_dataset, get_datasets_for_project_id, @@ -104,6 +121,25 @@ def test_bigquery_v2_ingest( mcp_output_path = "{}/{}".format(tmp_path, "bigquery_mcp_output.json") dataset_name = "bigquery-dataset-1" + + def side_effect(*args: Any) -> Optional[PlatformResource]: + if args[0].primary_key == "mixedcasetag": + return PlatformResource.create( + key=PlatformResourceKey( + primary_key="mixedcasetag", + resource_type="BigQueryLabelInfo", + platform="bigquery", + ), + value=BigQueryLabelInfo( + datahub_urn="urn:li:tag:MixedCaseTag", + managed_by_datahub=True, + key="mixedcasetag", + value="", + ), + ) + return None + + get_platform_resource.side_effect = side_effect get_datasets_for_project_id.return_value = [ BigqueryDataset(name=dataset_name, location="US") ] @@ -158,7 +194,8 @@ def test_bigquery_v2_ingest( rows_count=None, labels={ "priority": "high", - "purchase": "urn_li_encoded_tag_ovzg4otmne5hiylhhjyhk4tdnbqxgzi_", + "purchase": "", + "mixedcasetag": "", }, ) get_tables_for_dataset.return_value = iter([bigquery_table]) diff --git a/metadata-ingestion/tests/unit/test_bigquery_source.py b/metadata-ingestion/tests/unit/test_bigquery_source.py index 38239d150dd6b..b605e9b3f8a3e 100644 --- a/metadata-ingestion/tests/unit/test_bigquery_source.py +++ b/metadata-ingestion/tests/unit/test_bigquery_source.py @@ -489,30 +489,45 @@ def test_gen_table_dataset_workunits( gen = schema_gen.gen_table_dataset_workunits( bigquery_table, [], project_id, dataset_name ) - mcp = cast(MetadataChangeProposalClass, next(iter(gen)).metadata) - assert mcp.aspect == StatusClass(removed=False) + mcps = list(gen) + + # Helper function to find MCP by aspect type + def find_mcp_by_aspect(aspect_type): + return next( + mcp # type: ignore + for mcp in mcps + if isinstance(mcp.metadata.aspect, aspect_type) # type: ignore + ) - mcp = cast(MetadataChangeProposalClass, next(iter(gen)).metadata) - assert isinstance(mcp.aspect, SchemaMetadataClass) - assert mcp.aspect.schemaName == f"{project_id}.{dataset_name}.{bigquery_table.name}" - assert mcp.aspect.fields == [] + # Assert StatusClass + status_mcp = find_mcp_by_aspect(StatusClass) + assert status_mcp.metadata.aspect.removed is False - mcp = cast(MetadataChangeProposalClass, next(iter(gen)).metadata) - assert isinstance(mcp.aspect, DatasetPropertiesClass) - assert mcp.aspect.name == bigquery_table.name + # Assert SchemaMetadataClass + schema_mcp = find_mcp_by_aspect(SchemaMetadataClass) + assert ( + schema_mcp.metadata.aspect.schemaName + == f"{project_id}.{dataset_name}.{bigquery_table.name}" + ) + assert schema_mcp.metadata.aspect.fields == [] + + # Assert DatasetPropertiesClass + dataset_props_mcp = find_mcp_by_aspect(DatasetPropertiesClass) + assert dataset_props_mcp.metadata.aspect.name == bigquery_table.name assert ( - mcp.aspect.qualifiedName == f"{project_id}.{dataset_name}.{bigquery_table.name}" + dataset_props_mcp.metadata.aspect.qualifiedName + == f"{project_id}.{dataset_name}.{bigquery_table.name}" ) - assert mcp.aspect.description == bigquery_table.comment - assert mcp.aspect.created == TimeStampClass( + assert dataset_props_mcp.metadata.aspect.description == bigquery_table.comment + assert dataset_props_mcp.metadata.aspect.created == TimeStampClass( time=int(bigquery_table.created.timestamp() * 1000) ) - assert mcp.aspect.lastModified == TimeStampClass( + assert dataset_props_mcp.metadata.aspect.lastModified == TimeStampClass( time=int(bigquery_table.last_altered.timestamp() * 1000) ) - assert mcp.aspect.tags == [] + assert dataset_props_mcp.metadata.aspect.tags == [] - assert mcp.aspect.customProperties == { + expected_custom_properties = { "expiration_date": str(bigquery_table.expires), "size_in_bytes": str(bigquery_table.size_in_bytes), "billable_bytes_active": str(bigquery_table.active_billable_bytes), @@ -523,24 +538,33 @@ def test_gen_table_dataset_workunits( "max_shard_id": str(bigquery_table.max_shard_id), "is_sharded": "True", } + assert ( + dataset_props_mcp.metadata.aspect.customProperties == expected_custom_properties + ) - mcp = cast(MetadataChangeProposalClass, next(iter(gen)).metadata) - assert isinstance(mcp.aspect, GlobalTagsClass) - assert mcp.aspect.tags == [ + # Assert GlobalTagsClass + global_tags_mcp = find_mcp_by_aspect(GlobalTagsClass) + assert global_tags_mcp.metadata.aspect.tags == [ TagAssociationClass( "urn:li:tag:data_producer_owner_email:games_team-nytimes_com" ) ] - mcp = cast(MetadataChangeProposalClass, next(iter(gen)).metadata) - assert isinstance(mcp.aspect, ContainerClass) + # Assert ContainerClass + container_mcp = find_mcp_by_aspect(ContainerClass) + assert container_mcp is not None - mcp = cast(MetadataChangeProposalClass, next(iter(gen)).metadata) - assert isinstance(mcp.aspect, DataPlatformInstanceClass) + # Assert DataPlatformInstanceClass + data_platform_instance_mcp = find_mcp_by_aspect(DataPlatformInstanceClass) + assert data_platform_instance_mcp is not None - mcp = cast(MetadataChangeProposalClass, next(iter(gen)).metadata) - assert isinstance(mcp.aspect, SubTypesClass) - assert mcp.aspect.typeNames[1] == DatasetSubTypes.TABLE + # Assert SubTypesClass + sub_types_mcp = find_mcp_by_aspect(SubTypesClass) + assert sub_types_mcp.metadata.aspect.typeNames[1] == DatasetSubTypes.TABLE + + # Ensure all MCPs were checked + # TODO: Test for PlatformResource MCPs as well + assert len(mcps) >= 7 @patch.object(BigQueryV2Config, "get_bigquery_client") From 58f5b4a0cb6bff1f1f0f80d09d980d5a0f438d5f Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Wed, 16 Oct 2024 04:53:01 -0700 Subject: [PATCH 33/50] fix(ingest): add preset deps (#11637) --- metadata-ingestion/setup.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index f14c080df644a..bfec2c00cb864 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -321,6 +321,13 @@ "Authlib", } +superset_common = { + "requests", + "sqlalchemy", + "great_expectations", + "greenlet", +} + # Note: for all of these, framework_common will be added. plugins: Dict[str, Set[str]] = { # Sink plugins. @@ -462,12 +469,8 @@ "sqlalchemy": sql_common, "sql-queries": usage_common | sqlglot_lib, "slack": slack, - "superset": { - "requests", - "sqlalchemy", - "great_expectations", - "greenlet", - }, + "superset": superset_common, + "preset": superset_common, # FIXME: I don't think tableau uses sqllineage anymore so we should be able # to remove that dependency. "tableau": {"tableauserverclient>=0.24.0"} | sqllineage_lib | sqlglot_lib, From 06698308df21f34a752f7bd18ac8b3d3f8b104e6 Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Wed, 16 Oct 2024 10:22:59 -0500 Subject: [PATCH 34/50] feat(docker-profiles): allow version override for quickstartDebug (#11643) --- docker/build.gradle | 3 +++ docker/profiles/docker-compose.frontend.yml | 2 +- docker/profiles/docker-compose.gms.yml | 8 ++++---- docker/profiles/docker-compose.prerequisites.yml | 10 +++++----- 4 files changed, 13 insertions(+), 10 deletions(-) diff --git a/docker/build.gradle b/docker/build.gradle index 47f52079e67e0..cdf2d1271d6e8 100644 --- a/docker/build.gradle +++ b/docker/build.gradle @@ -128,6 +128,9 @@ dockerCompose { isRequiredBy(tasks.named('quickstartDebug')) composeAdditionalArgs = ['--profile', 'debug'] + if (System.getenv().containsKey("DATAHUB_VERSION")) { + environment.put 'DATAHUB_VERSION', System.getenv("DATAHUB_VERSION") + } environment.put 'DATAHUB_TELEMETRY_ENABLED', 'false' // disabled when built locally useComposeFiles = ['profiles/docker-compose.yml'] diff --git a/docker/profiles/docker-compose.frontend.yml b/docker/profiles/docker-compose.frontend.yml index b5b2d50143927..c6b15a7016670 100644 --- a/docker/profiles/docker-compose.frontend.yml +++ b/docker/profiles/docker-compose.frontend.yml @@ -16,7 +16,7 @@ x-datahub-frontend-service: &datahub-frontend-service x-datahub-frontend-service-dev: &datahub-frontend-service-dev <<: *datahub-frontend-service - image: ${DATAHUB_FRONTEND_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-frontend-react}:debug + image: ${DATAHUB_FRONTEND_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-frontend-react}:${DATAHUB_VERSION:-debug} ports: - ${DATAHUB_MAPPED_FRONTEND_DEBUG_PORT:-5002}:5002 - ${DATAHUB_MAPPED_FRONTEND_PORT:-9002}:9002 diff --git a/docker/profiles/docker-compose.gms.yml b/docker/profiles/docker-compose.gms.yml index 6e3e5780506ac..2683734c2d5e5 100644 --- a/docker/profiles/docker-compose.gms.yml +++ b/docker/profiles/docker-compose.gms.yml @@ -73,7 +73,7 @@ x-datahub-system-update-service: &datahub-system-update-service x-datahub-system-update-service-dev: &datahub-system-update-service-dev <<: *datahub-system-update-service - image: ${DATAHUB_UPGRADE_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-upgrade}:debug + image: ${DATAHUB_UPGRADE_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-upgrade}:${DATAHUB_VERSION:-debug} ports: - ${DATAHUB_MAPPED_UPGRADE_DEBUG_PORT:-5003}:5003 environment: &datahub-system-update-dev-env @@ -115,7 +115,7 @@ x-datahub-gms-service: &datahub-gms-service x-datahub-gms-service-dev: &datahub-gms-service-dev <<: *datahub-gms-service - image: ${DATAHUB_GMS_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-gms}:debug + image: ${DATAHUB_GMS_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-gms}:${DATAHUB_VERSION:-debug} ports: - ${DATAHUB_MAPPED_GMS_DEBUG_PORT:-5001}:5001 - ${DATAHUB_MAPPED_GMS_PORT:-8080}:8080 @@ -159,7 +159,7 @@ x-datahub-mae-consumer-service: &datahub-mae-consumer-service x-datahub-mae-consumer-service-dev: &datahub-mae-consumer-service-dev <<: *datahub-mae-consumer-service - image: ${DATAHUB_MAE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mae-consumer}:debug + image: ${DATAHUB_MAE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mae-consumer}:${DATAHUB_VERSION:-debug} environment: <<: [*datahub-dev-telemetry-env, *datahub-mae-consumer-env] volumes: @@ -185,7 +185,7 @@ x-datahub-mce-consumer-service: &datahub-mce-consumer-service x-datahub-mce-consumer-service-dev: &datahub-mce-consumer-service-dev <<: *datahub-mce-consumer-service - image: ${DATAHUB_MCE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mce-consumer}:debug + image: ${DATAHUB_MCE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mce-consumer}:${DATAHUB_VERSION:-debug} environment: <<: [*datahub-dev-telemetry-env, *datahub-mce-consumer-env] volumes: diff --git a/docker/profiles/docker-compose.prerequisites.yml b/docker/profiles/docker-compose.prerequisites.yml index eed23a749628f..c9cb444d57c98 100644 --- a/docker/profiles/docker-compose.prerequisites.yml +++ b/docker/profiles/docker-compose.prerequisites.yml @@ -135,7 +135,7 @@ services: mysql-setup-dev: <<: *mysql-setup profiles: *mysql-profiles-dev - image: ${DATAHUB_MYSQL_SETUP_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mysql-setup}:debug + image: ${DATAHUB_MYSQL_SETUP_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mysql-setup}:${DATAHUB_VERSION:-debug} postgres: profiles: *postgres-profiles hostname: postgres @@ -166,7 +166,7 @@ services: postgres-setup-dev: <<: *postgres-setup profiles: *postgres-profiles-dev - image: ${DATAHUB_POSTGRES_SETUP_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-postgres-setup}:debug + image: ${DATAHUB_POSTGRES_SETUP_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-postgres-setup}:${DATAHUB_VERSION:-debug} cassandra: profiles: *cassandra-profiles hostname: cassandra @@ -272,7 +272,7 @@ services: environment: <<: *kafka-setup-env DATAHUB_PRECREATE_TOPICS: ${DATAHUB_PRECREATE_TOPICS:-true} - image: ${DATAHUB_KAFKA_SETUP_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-kafka-setup}:debug + image: ${DATAHUB_KAFKA_SETUP_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-kafka-setup}:${DATAHUB_VERSION:-debug} elasticsearch: profiles: *elasticsearch-profiles hostname: search @@ -296,7 +296,7 @@ services: volumes: - esdata:/usr/share/elasticsearch/data elasticsearch-setup-dev: &elasticsearch-setup-dev - image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-elasticsearch-setup}:debug + image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-elasticsearch-setup}:${DATAHUB_VERSION:-debug} profiles: *elasticsearch-profiles hostname: elasticsearch-setup env_file: elasticsearch-setup/env/docker.env @@ -347,7 +347,7 @@ services: <<: *opensearch-setup profiles: *opensearch-profiles-dev hostname: opensearch-setup-dev - image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-elasticsearch-setup}:debug + image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-elasticsearch-setup}:${DATAHUB_VERSION:-debug} environment: <<: *search-datastore-environment USE_AWS_ELASTICSEARCH: ${USE_AWS_ELASTICSEARCH:-true} From ce909c8b8b2509e696f71939d9354dc58d5ffd0d Mon Sep 17 00:00:00 2001 From: sid-acryl <155424659+sid-acryl@users.noreply.github.com> Date: Wed, 16 Oct 2024 20:58:05 +0530 Subject: [PATCH 35/50] feat(ingest/powerbi): link cross workspace dataset into assets (#11560) --- docs/how/updating-datahub.md | 15 + .../ingestion/source/powerbi/config.py | 11 +- .../powerbi/rest_api_wrapper/data_classes.py | 11 +- .../powerbi/rest_api_wrapper/data_resolver.py | 33 +- .../powerbi/rest_api_wrapper/powerbi_api.py | 69 +- .../integration/powerbi/golden_test_cll.json | 1114 ++++++++--------- .../golden_test_cross_workspace_dataset.json | 668 ++++++++++ .../golden_test_disabled_ownership.json | 720 +++++------ .../powerbi/golden_test_endorsement.json | 846 ++++++------- .../powerbi/golden_test_ingest.json | 722 +++++------ .../golden_test_ingest_patch_disabled.json | 715 +++++------ .../powerbi/golden_test_lineage.json | 1014 +++++++-------- .../golden_test_lower_case_urn_ingest.json | 766 ++++++------ ..._config_and_modified_since_admin_only.json | 624 ++++----- .../golden_test_platform_instance_ingest.json | 702 +++++------ .../powerbi/golden_test_profiling.json | 68 +- .../golden_test_scan_all_workspaces.json | 818 ++++++------ ...lden_test_server_to_platform_instance.json | 968 +++++++------- .../cross_workspace_mock_response.json | 220 ++++ .../mock_data/default_mock_response.json | 558 +++++++++ .../mock_data/workspace_type_filter.json | 76 ++ .../tests/integration/powerbi/test_powerbi.py | 797 ++---------- 22 files changed, 6305 insertions(+), 5230 deletions(-) create mode 100644 metadata-ingestion/tests/integration/powerbi/golden_test_cross_workspace_dataset.json create mode 100644 metadata-ingestion/tests/integration/powerbi/mock_data/cross_workspace_mock_response.json create mode 100644 metadata-ingestion/tests/integration/powerbi/mock_data/default_mock_response.json create mode 100644 metadata-ingestion/tests/integration/powerbi/mock_data/workspace_type_filter.json diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md index dbcc7da846703..5b4769ed30e3e 100644 --- a/docs/how/updating-datahub.md +++ b/docs/how/updating-datahub.md @@ -18,6 +18,21 @@ This file documents any backwards-incompatible changes in DataHub and assists pe ## Next +- #11560 - The PowerBI ingestion source configuration option include_workspace_name_in_dataset_urn determines whether the workspace name is included in the PowerBI dataset's URN.
PowerBI allows to have identical name of semantic model and their tables across the workspace, It will overwrite the semantic model in-case of multi-workspace ingestion.
+ Entity urn with `include_workspace_name_in_dataset_urn: false` + ``` + urn:li:dataset:(urn:li:dataPlatform:powerbi,[.].,) + ``` + + Entity urn with `include_workspace_name_in_dataset_urn: true` + ``` + urn:li:dataset:(urn:li:dataPlatform:powerbi,[.]...,) + ``` + + The config `include_workspace_name_in_dataset_urn` is default to `false` for backward compatiblity, However, we recommend enabling this flag after performing the necessary cleanup. + If stateful ingestion is enabled, running ingestion with the latest CLI version will handle the cleanup automatically. Otherwise, we recommend soft deleting all powerbi data via the DataHub CLI: + `datahub delete --platform powerbi --soft` and then re-ingest with the latest CLI version, ensuring the `include_workspace_name_in_dataset_urn` configuration is set to true. + ### Breaking Changes - #11486 - Deprecated Criterion filters using `value`. Use `values` instead. This also deprecates the ability to use comma delimited string to represent multiple values using `value`. diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py index 522639a160781..0716a658b61c6 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py @@ -331,8 +331,8 @@ class PowerBiDashboardSourceConfig( ) workspace_id_as_urn_part: bool = pydantic.Field( default=False, - description="Highly recommend changing this to True, as you can have the same workspace name" - "To maintain backward compatability, this is set to False which uses workspace name", + description="It is recommended to set this to True only if you have legacy workspaces based on Office 365 groups, as those workspaces can have identical names." + "To maintain backward compatibility, this is set to False which uses workspace name", ) # Enable/Disable extracting ownership information of Dashboard extract_ownership: bool = pydantic.Field( @@ -466,6 +466,13 @@ class PowerBiDashboardSourceConfig( " Note: This field works in conjunction with 'workspace_id_pattern'. Both must be matched for a workspace to be processed.", ) + include_workspace_name_in_dataset_urn: bool = pydantic.Field( + default=False, + description="It is recommended to set this to true, as it helps prevent the overwriting of datasets." + "Read section #11560 at https://datahubproject.io/docs/how/updating-datahub/ before enabling this option." + "To maintain backward compatibility, this is set to False.", + ) + @root_validator(skip_on_failure=True) def validate_extract_column_level_lineage(cls, values: Dict) -> Dict: flags = [ diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py index fb0959ac604c4..d54b4a42b742e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py @@ -142,6 +142,7 @@ class PowerBIDataset: description: str webUrl: Optional[str] workspace_id: str + workspace_name: str parameters: Dict[str, str] # Table in datasets @@ -225,7 +226,10 @@ class Report: webUrl: Optional[str] embedUrl: str description: str - dataset: Optional["PowerBIDataset"] + dataset_id: Optional[str] # dataset_id is coming from REST API response + dataset: Optional[ + "PowerBIDataset" + ] # This the dataclass later initialise by powerbi_api.py pages: List["Page"] users: List["User"] tags: List[str] @@ -283,7 +287,7 @@ def __hash__(self): return hash(self.__members()) -def new_powerbi_dataset(workspace_id: str, raw_instance: dict) -> PowerBIDataset: +def new_powerbi_dataset(workspace: Workspace, raw_instance: dict) -> PowerBIDataset: return PowerBIDataset( id=raw_instance["id"], name=raw_instance.get("name"), @@ -293,7 +297,8 @@ def new_powerbi_dataset(workspace_id: str, raw_instance: dict) -> PowerBIDataset if raw_instance.get("webUrl") is not None else None ), - workspace_id=workspace_id, + workspace_id=workspace.id, + workspace_name=workspace.name, parameters={}, tables=[], tags=[], diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py index d89b9662d12ed..8849e19ea8622 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py @@ -129,7 +129,7 @@ def profile_dataset( @abstractmethod def get_dataset( - self, workspace_id: str, dataset_id: str + self, workspace: Workspace, dataset_id: str ) -> Optional[PowerBIDataset]: pass @@ -279,9 +279,10 @@ def fetch_reports(): pages=self._get_pages_by_report( workspace=workspace, report_id=raw_instance[Constant.ID] ), + dataset_id=raw_instance.get(Constant.DATASET_ID), users=[], # It will be fetched using Admin Fetcher based on condition tags=[], # It will be fetched using Admin Fetcher based on condition - dataset=workspace.datasets.get(raw_instance.get(Constant.DATASET_ID)), + dataset=None, # It will come from dataset_registry defined in powerbi_api.py ) for raw_instance in fetch_reports() if Constant.APP_ID @@ -317,11 +318,6 @@ def new_dataset_or_report(tile_instance: Any) -> dict: Find out which is the data source for tile. It is either REPORT or DATASET """ report_fields = { - Constant.DATASET: ( - workspace.datasets.get(tile_instance.get(Constant.DATASET_ID)) - if tile_instance.get("datasetId") is not None - else None - ), Constant.REPORT: ( self.get_report( workspace=workspace, @@ -372,6 +368,7 @@ def new_dataset_or_report(tile_instance: Any) -> dict: title=instance.get(Constant.TITLE), embedUrl=instance.get(Constant.EMBED_URL), dataset_id=instance.get(Constant.DATASET_ID), + dataset=None, **new_dataset_or_report(instance), ) for instance in tile_dict @@ -430,14 +427,14 @@ class RegularAPIResolver(DataResolverBase): } def get_dataset( - self, workspace_id: str, dataset_id: str + self, workspace: Workspace, dataset_id: str ) -> Optional[PowerBIDataset]: """ Fetch the dataset from PowerBi for the given dataset identifier """ - if workspace_id is None or dataset_id is None: + if workspace.id is None or dataset_id is None: logger.debug("Input values are None") - logger.debug(f"{Constant.WorkspaceId}={workspace_id}") + logger.debug(f"{Constant.WorkspaceId}={workspace.id}") logger.debug(f"{Constant.DatasetId}={dataset_id}") return None @@ -447,7 +444,7 @@ def get_dataset( # Replace place holders dataset_get_endpoint = dataset_get_endpoint.format( POWERBI_BASE_URL=DataResolverBase.BASE_URL, - WORKSPACE_ID=workspace_id, + WORKSPACE_ID=workspace.id, DATASET_ID=dataset_id, ) # Hit PowerBi @@ -456,13 +453,13 @@ def get_dataset( dataset_get_endpoint, headers=self.get_authorization_header(), ) - # Check if we got response from PowerBi + # Check if we got a response from PowerBi response.raise_for_status() response_dict = response.json() logger.debug(f"datasets = {response_dict}") - # PowerBi Always return the webURL, in-case if it is None then setting complete webURL to None instead of + # PowerBi Always return the webURL, in-case if it is None, then setting complete webURL to None instead of # None/details - return new_powerbi_dataset(workspace_id, response_dict) + return new_powerbi_dataset(workspace, response_dict) def get_dataset_parameters( self, workspace_id: str, dataset_id: str @@ -910,11 +907,11 @@ def get_tiles_endpoint(self, workspace: Workspace, dashboard_id: str) -> str: ) def get_dataset( - self, workspace_id: str, dataset_id: str + self, workspace: Workspace, dataset_id: str ) -> Optional[PowerBIDataset]: datasets_endpoint = self.API_ENDPOINTS[Constant.DATASET_LIST].format( POWERBI_ADMIN_BASE_URL=DataResolverBase.ADMIN_BASE_URL, - WORKSPACE_ID=workspace_id, + WORKSPACE_ID=workspace.id, ) # Hit PowerBi logger.debug(f"Request to datasets URL={datasets_endpoint}") @@ -930,13 +927,13 @@ def get_dataset( if len(response_dict.get(Constant.VALUE, [])) == 0: logger.warning( "Dataset not found. workspace_id = %s, dataset_id = %s", - workspace_id, + workspace.id, dataset_id, ) return None raw_instance: dict = response_dict[Constant.VALUE][0] - return new_powerbi_dataset(workspace_id, raw_instance) + return new_powerbi_dataset(workspace, raw_instance) def _get_pages_by_report(self, workspace: Workspace, report_id: str) -> List[Page]: return [] # Report pages are not available in Admin API diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py index 25e97b158d48b..37793bc32980b 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py @@ -31,6 +31,28 @@ logger = logging.getLogger(__name__) +def form_full_table_name( + config: PowerBiDashboardSourceConfig, + workspace: Workspace, + dataset_name: str, + table_name: str, +) -> str: + + full_table_name: str = "{}.{}".format( + dataset_name.replace(" ", "_"), table_name.replace(" ", "_") + ) + + if config.include_workspace_name_in_dataset_urn: + workspace_identifier: str = ( + workspace.id + if config.workspace_id_as_urn_part + else workspace.name.replace(" ", "_").lower() + ) + full_table_name = f"{workspace_identifier}.{full_table_name}" + + return full_table_name + + class PowerBiAPI: def __init__( self, @@ -52,6 +74,14 @@ def __init__( tenant_id=self.__config.tenant_id, ) + self.reporter: PowerBiDashboardSourceReport = reporter + + # A report or tile in one workspace can be built using a dataset from another workspace. + # We need to store the dataset ID (which is a UUID) mapped to its dataset instance. + # This mapping will allow us to retrieve the appropriate dataset for + # reports and tiles across different workspaces. + self.dataset_registry: Dict[str, PowerBIDataset] = {} + def log_http_error(self, message: str) -> Any: logger.warning(message) _, e, _ = sys.exc_info() @@ -158,6 +188,16 @@ def get_reports(self, workspace: Workspace) -> List[Report]: reports: List[Report] = [] try: reports = self._get_resolver().get_reports(workspace) + # Fill Report dataset + for report in reports: + if report.dataset_id: + report.dataset = self.dataset_registry.get(report.dataset_id) + if report.dataset is None: + self.reporter.info( + title="Missing Lineage For Report", + message="A cross-workspace reference that failed to be resolved. Please ensure that no global workspace is being filtered out due to the workspace_id_pattern.", + context=f"report-name: {report.name} and dataset-id: {report.dataset_id}", + ) except: self.log_http_error( message=f"Unable to fetch reports for workspace {workspace.name}" @@ -312,14 +352,14 @@ def _get_workspace_datasets(self, workspace: Workspace) -> dict: for dataset_dict in datasets: dataset_instance: PowerBIDataset = self._get_resolver().get_dataset( - workspace_id=scan_result[Constant.ID], + workspace=workspace, dataset_id=dataset_dict[Constant.ID], ) # fetch + set dataset parameters try: dataset_parameters = self._get_resolver().get_dataset_parameters( - workspace_id=scan_result[Constant.ID], + workspace_id=workspace.id, dataset_id=dataset_dict[Constant.ID], ) dataset_instance.parameters = dataset_parameters @@ -350,9 +390,11 @@ def _get_workspace_datasets(self, workspace: Workspace) -> dict: ) table = Table( name=table[Constant.NAME], - full_name="{}.{}".format( - dataset_name.replace(" ", "_"), - table[Constant.NAME].replace(" ", "_"), + full_name=form_full_table_name( + config=self.__config, + workspace=workspace, + dataset_name=dataset_name, + table_name=table[Constant.NAME], ), expression=expression, columns=[ @@ -382,7 +424,8 @@ def _get_workspace_datasets(self, workspace: Workspace) -> dict: return dataset_map def _fill_metadata_from_scan_result( - self, workspaces: List[Workspace] + self, + workspaces: List[Workspace], ) -> List[Workspace]: workspace_ids = [workspace.id for workspace in workspaces] scan_result = self._get_scan_result(workspace_ids) @@ -423,7 +466,8 @@ def _fill_metadata_from_scan_result( ) cur_workspace.scan_result = workspace_metadata cur_workspace.datasets = self._get_workspace_datasets(cur_workspace) - + # collect all datasets in the registry + self.dataset_registry.update(cur_workspace.datasets) # Fetch endorsement tag if it is enabled from configuration if self.__config.extract_endorsements_to_tags: cur_workspace.dashboard_endorsements = self._get_dashboard_endorsements( @@ -468,6 +512,16 @@ def fill_dashboards() -> None: dashboard.tiles = self._get_resolver().get_tiles( workspace, dashboard=dashboard ) + # set the dataset for tiles + for tile in dashboard.tiles: + if tile.dataset_id: + tile.dataset = self.dataset_registry.get(tile.dataset_id) + if tile.dataset is None: + self.reporter.info( + title="Missing Lineage For Tile", + message="A cross-workspace reference that failed to be resolved. Please ensure that no global workspace is being filtered out due to the workspace_id_pattern.", + context=f"workspace-name: {workspace.name}, tile-name: {tile.title}, dataset-id: {tile.dataset_id}", + ) def fill_reports() -> None: if self.__config.extract_reports is False: @@ -497,6 +551,7 @@ def fill_dashboard_tags() -> None: def fill_workspaces( self, workspaces: List[Workspace], reporter: PowerBiDashboardSourceReport ) -> Iterable[Workspace]: + workspaces = self._fill_metadata_from_scan_result(workspaces=workspaces) # First try to fill the admin detail as some regular metadata contains lineage to admin metadata for workspace in workspaces: diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_cll.json b/metadata-ingestion/tests/integration/powerbi/golden_test_cll.json index 66ee60c2eebb3..04dfd3bab3c0b 100644 --- a/metadata-ingestion/tests/integration/powerbi/golden_test_cll.json +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_cll.json @@ -17,6 +17,38 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:users.User1@foo.com", + "changeType": "UPSERT", + "aspectName": "corpUserKey", + "aspect": { + "json": { + "username": "User1@foo.com" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", @@ -40,13 +72,13 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:users.User2@foo.com", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "corpUserKey", "aspect": { "json": { - "removed": false + "username": "User2@foo.com" } }, "systemMetadata": { @@ -73,6 +105,77 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "changeType": "UPSERT", + "aspectName": "chartInfo", + "aspect": { + "json": { + "customProperties": { + "createdFrom": "Dataset", + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", + "datasetWebUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details" + }, + "title": "test_tile", + "description": "test_tile", + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + }, + "inputs": [ + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", @@ -114,13 +217,15 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "subTypes", "aspect": { "json": { - "removed": false + "typeNames": [ + "PowerBI Tile" + ] } }, "systemMetadata": { @@ -133,12 +238,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "status", "aspect": { "json": { - "typeNames": [ - "Table" - ] + "removed": false } }, "systemMetadata": { @@ -148,22 +251,14 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", "changeType": "UPSERT", - "aspectName": "upstreamLineage", + "aspectName": "chartKey", "aspect": { "json": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,PBI_TEST.TEST.TESTTABLE,PROD)", - "type": "TRANSFORMED" - } - ] + "dashboardTool": "powerbi", + "chartId": "charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0" } }, "systemMetadata": { @@ -212,6 +307,24 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", @@ -228,6 +341,24 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "changeType": "UPSERT", + "aspectName": "browsePaths", + "aspect": { + "json": { + "paths": [ + "/powerbi/demo-workspace" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", @@ -246,6 +377,51 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "demo-workspace" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,PBI_TEST.TEST.TESTTABLE,PROD)", + "type": "TRANSFORMED" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", @@ -298,70 +474,36 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", - "changeType": "UPSERT", - "aspectName": "viewProperties", - "aspect": { - "json": { - "materialized": false, - "viewLogic": "let\n Source = GoogleBigQuery.Database([BillingProject = #\"Parameter - Source\"]),\n#\"gcp-project\" = Source{[Name=#\"Parameter - Source\"]}[Data],\nuniversal_Schema = #\"gcp-project\"{[Name=\"universal\",Kind=\"Schema\"]}[Data],\nD_WH_DATE_Table = universal_Schema{[Name=\"D_WH_DATE\",Kind=\"Table\"]}[Data],\n#\"Filtered Rows\" = Table.SelectRows(D_WH_DATE_Table, each [D_DATE] > #datetime(2019, 9, 10, 0, 0, 0)),\n#\"Filtered Rows1\" = Table.SelectRows(#\"Filtered Rows\", each DateTime.IsInPreviousNHours([D_DATE], 87600))\n in \n#\"Filtered Rows1\"", - "viewLanguage": "m_query" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "chartInfo", "aspect": { "json": { "customProperties": { - "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" + "createdFrom": "Dataset", + "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed", + "datasetWebUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details" }, - "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "big-query-with-parameter", - "description": "Library dataset description", - "tags": [] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Table" + "title": "yearly_sales", + "description": "yearly_sales", + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + }, + "inputs": [ + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)" + } ] } }, @@ -373,13 +515,13 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { "json": { "materialized": false, - "viewLogic": "let\n Source = Value.NativeQuery(Snowflake.Databases(\"xaa48144.snowflakecomputing.com\",\"GSL_TEST_WH\",[Role=\"ACCOUNTADMIN\"]){[Name=\"GSL_TEST_DB\"]}[Data], \"select A.name from GSL_TEST_DB.PUBLIC.SALES_ANALYST as A inner join GSL_TEST_DB.PUBLIC.SALES_FORECAST as B on A.name = B.name where startswith(A.name, 'mo')\", null, [EnableFolding=true])\nin\n Source", + "viewLogic": "let\n Source = GoogleBigQuery.Database([BillingProject = #\"Parameter - Source\"]),\n#\"gcp-project\" = Source{[Name=#\"Parameter - Source\"]}[Data],\nuniversal_Schema = #\"gcp-project\"{[Name=\"universal\",Kind=\"Schema\"]}[Data],\nD_WH_DATE_Table = universal_Schema{[Name=\"D_WH_DATE\",Kind=\"Table\"]}[Data],\n#\"Filtered Rows\" = Table.SelectRows(D_WH_DATE_Table, each [D_DATE] > #datetime(2019, 9, 10, 0, 0, 0)),\n#\"Filtered Rows1\" = Table.SelectRows(#\"Filtered Rows\", each DateTime.IsInPreviousNHours([D_DATE], 87600))\n in \n#\"Filtered Rows1\"", "viewLanguage": "m_query" } }, @@ -391,7 +533,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { @@ -400,7 +542,7 @@ "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" }, "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "snowflake native-query-with-join", + "name": "big-query-with-parameter", "description": "Library dataset description", "tags": [] } @@ -412,33 +554,8 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", - "changeType": "UPSERT", - "aspectName": "upstreamLineage", - "aspect": { - "json": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,my-test-project.universal.D_WH_DATE,PROD)", - "type": "TRANSFORMED" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -454,14 +571,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "status", "aspect": { "json": { - "typeNames": [ - "Table" - ] + "removed": false } }, "systemMetadata": { @@ -471,15 +586,14 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "chartKey", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = Oracle.Database(\"localhost:1521/salesdb.domain.com\", [HierarchicalNavigation=true]), HR = Source{[Schema=\"HR\"]}[Data], EMPLOYEES1 = HR{[Name=\"EMPLOYEES\"]}[Data] \n in EMPLOYEES1", - "viewLanguage": "m_query" + "dashboardTool": "powerbi", + "chartId": "charts.23212598-23b5-4980-87cc-5fc0ecd84385" } }, "systemMetadata": { @@ -490,52 +604,13 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", "changeType": "UPSERT", - "aspectName": "upstreamLineage", + "aspectName": "subTypes", "aspect": { "json": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,gsl_test_db.public.sales_analyst,PROD)", - "type": "TRANSFORMED" - }, - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,gsl_test_db.public.sales_forecast,PROD)", - "type": "TRANSFORMED" - } - ], - "fineGrainedLineages": [ - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,gsl_test_db.public.sales_analyst,PROD),name)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV),name)" - ], - "confidenceScore": 1.0 - }, - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,gsl_test_db.public.sales_analyst,PROD),name)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV),name)" - ], - "confidenceScore": 1.0 - } + "typeNames": [ + "Table" ] } }, @@ -546,19 +621,15 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "subTypes", "aspect": { "json": { - "customProperties": { - "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" - }, - "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "job-history", - "description": "Library dataset description", - "tags": [] + "typeNames": [ + "PowerBI Tile" + ] } }, "systemMetadata": { @@ -568,13 +639,15 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "browsePaths", "aspect": { "json": { - "removed": false + "paths": [ + "/powerbi/demo-workspace" + ] } }, "systemMetadata": { @@ -585,13 +658,20 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "upstreamLineage", "aspect": { "json": { - "typeNames": [ - "Table" + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,my-test-project.universal.D_WH_DATE,PROD)", + "type": "TRANSFORMED" + } ] } }, @@ -602,20 +682,15 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "upstreamLineage", + "aspectName": "browsePathsV2", "aspect": { "json": { - "upstreams": [ + "path": [ { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:oracle,salesdb.HR.EMPLOYEES,PROD)", - "type": "TRANSFORMED" + "id": "demo-workspace" } ] } @@ -628,13 +703,13 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { "json": { "materialized": false, - "viewLogic": "let\n Source = PostgreSQL.Database(\"localhost\" , \"mics\" ),\n public_order_date = Source{[Schema=\"public\",Item=\"order_date\"]}[Data] \n in \n public_order_date", + "viewLogic": "let\n Source = Value.NativeQuery(Snowflake.Databases(\"xaa48144.snowflakecomputing.com\",\"GSL_TEST_WH\",[Role=\"ACCOUNTADMIN\"]){[Name=\"GSL_TEST_DB\"]}[Data], \"select A.name from GSL_TEST_DB.PUBLIC.SALES_ANALYST as A inner join GSL_TEST_DB.PUBLIC.SALES_FORECAST as B on A.name = B.name where startswith(A.name, 'mo')\", null, [EnableFolding=true])\nin\n Source", "viewLanguage": "m_query" } }, @@ -646,7 +721,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { @@ -655,7 +730,7 @@ "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" }, "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "postgres_test_table", + "name": "snowflake native-query-with-join", "description": "Library dataset description", "tags": [] } @@ -667,13 +742,15 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "browsePaths", "aspect": { "json": { - "removed": false + "paths": [ + "/powerbi/demo-workspace" + ] } }, "systemMetadata": { @@ -684,14 +761,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "status", "aspect": { "json": { - "typeNames": [ - "Table" - ] + "removed": false } }, "systemMetadata": { @@ -701,23 +776,67 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", - "changeType": "UPSERT", - "aspectName": "upstreamLineage", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "changeType": "PATCH", + "aspectName": "dashboardInfo", "aspect": { - "json": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,mics.public.order_date,PROD)", - "type": "TRANSFORMED" + "json": [ + { + "op": "add", + "path": "/customProperties/chartCount", + "value": "2" + }, + { + "op": "add", + "path": "/customProperties/workspaceName", + "value": "demo-workspace" + }, + { + "op": "add", + "path": "/customProperties/workspaceId", + "value": "64ED5CAD-7C10-4684-8180-826122881108" + }, + { + "op": "add", + "path": "/title", + "value": "test_dashboard" + }, + { + "op": "add", + "path": "/description", + "value": "Description of test dashboard" + }, + { + "op": "add", + "path": "/charts/urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "value": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)" + }, + { + "op": "add", + "path": "/charts/urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "value": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)" + }, + { + "op": "add", + "path": "/dashboardUrl", + "value": "https://localhost/dashboards/web/1" + }, + { + "op": "add", + "path": "/lastModified", + "value": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } } - ] - } + } + ] }, "systemMetadata": { "lastObserved": 1643871600000, @@ -745,18 +864,14 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "viewProperties", "aspect": { "json": { - "customProperties": { - "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed" - }, - "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details", - "name": "dbo_book_issue", - "description": "hr pbi test description", - "tags": [] + "materialized": false, + "viewLogic": "let\n Source = Sql.Database(\"AUPRDWHDB\", \"COMMOPSDB\", [Query=\"select *,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTH_WID) as CD_AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,'-',''))), MONTH_WID) as AGENT_KEY#(lf)#(lf)from V_PS_CD_RETENTION\", CommandTimeout=#duration(0, 1, 30, 0)]),\n #\"Changed Type\" = Table.TransformColumnTypes(Source,{{\"mth_date\", type date}}),\n #\"Added Custom\" = Table.AddColumn(#\"Changed Type\", \"Month\", each Date.Month([mth_date])),\n #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"TPV Opening\", each if [Month] = 1 then [TPV_AMV_OPENING]\nelse if [Month] = 2 then 0\nelse if [Month] = 3 then 0\nelse if [Month] = 4 then [TPV_AMV_OPENING]\nelse if [Month] = 5 then 0\nelse if [Month] = 6 then 0\nelse if [Month] = 7 then [TPV_AMV_OPENING]\nelse if [Month] = 8 then 0\nelse if [Month] = 9 then 0\nelse if [Month] = 10 then [TPV_AMV_OPENING]\nelse if [Month] = 11 then 0\nelse if [Month] = 12 then 0\n\nelse 0)\nin\n #\"Added Custom1\"", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -767,12 +882,18 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "datasetProperties", "aspect": { "json": { - "removed": false + "customProperties": { + "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details", + "name": "ms_sql_native_table", + "description": "hr pbi test description", + "tags": [] } }, "systemMetadata": { @@ -785,12 +906,16 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "datasetProperties", "aspect": { "json": { - "typeNames": [ - "Table" - ] + "customProperties": { + "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details", + "name": "dbo_book_issue", + "description": "hr pbi test description", + "tags": [] } }, "systemMetadata": { @@ -803,12 +928,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "status", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = Sql.Database(\"AUPRDWHDB\", \"COMMOPSDB\", [Query=\"select *,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTH_WID) as CD_AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,'-',''))), MONTH_WID) as AGENT_KEY#(lf)#(lf)from V_PS_CD_RETENTION\", CommandTimeout=#duration(0, 1, 30, 0)]),\n #\"Changed Type\" = Table.TransformColumnTypes(Source,{{\"mth_date\", type date}}),\n #\"Added Custom\" = Table.AddColumn(#\"Changed Type\", \"Month\", each Date.Month([mth_date])),\n #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"TPV Opening\", each if [Month] = 1 then [TPV_AMV_OPENING]\nelse if [Month] = 2 then 0\nelse if [Month] = 3 then 0\nelse if [Month] = 4 then [TPV_AMV_OPENING]\nelse if [Month] = 5 then 0\nelse if [Month] = 6 then 0\nelse if [Month] = 7 then [TPV_AMV_OPENING]\nelse if [Month] = 8 then 0\nelse if [Month] = 9 then 0\nelse if [Month] = 10 then [TPV_AMV_OPENING]\nelse if [Month] = 11 then 0\nelse if [Month] = 12 then 0\n\nelse 0)\nin\n #\"Added Custom1\"", - "viewLanguage": "m_query" + "removed": false } }, "systemMetadata": { @@ -819,18 +942,14 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "subTypes", "aspect": { "json": { - "customProperties": { - "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed" - }, - "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details", - "name": "ms_sql_native_table", - "description": "hr pbi test description", - "tags": [] + "typeNames": [ + "Table" + ] } }, "systemMetadata": { @@ -841,20 +960,13 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", "changeType": "UPSERT", - "aspectName": "upstreamLineage", + "aspectName": "subTypes", "aspect": { "json": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:mssql,library.dbo.book_issue,PROD)", - "type": "TRANSFORMED" - } + "typeNames": [ + "Table" ] } }, @@ -866,7 +978,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -881,15 +993,13 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "status", "aspect": { "json": { - "typeNames": [ - "Table" - ] + "removed": false } }, "systemMetadata": { @@ -924,13 +1034,15 @@ } }, { - "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:users.User1@foo.com", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", "changeType": "UPSERT", - "aspectName": "corpUserKey", + "aspectName": "subTypes", "aspect": { "json": { - "username": "User1@foo.com" + "typeNames": [ + "Table" + ] } }, "systemMetadata": { @@ -940,13 +1052,14 @@ } }, { - "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:users.User2@foo.com", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", "changeType": "UPSERT", - "aspectName": "corpUserKey", + "aspectName": "dashboardKey", "aspect": { "json": { - "username": "User2@foo.com" + "dashboardTool": "powerbi", + "dashboardId": "powerbi.linkedin.com/dashboards/7D668CAD-7FFC-4505-9215-655BCA5BEBAE" } }, "systemMetadata": { @@ -956,50 +1069,52 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", "changeType": "UPSERT", - "aspectName": "chartInfo", + "aspectName": "upstreamLineage", "aspect": { "json": { - "customProperties": { - "createdFrom": "Dataset", - "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", - "datasetWebUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details" - }, - "title": "test_tile", - "description": "test_tile", - "lastModified": { - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - }, - "inputs": [ - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)" - }, + "upstreams": [ { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)" + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,gsl_test_db.public.sales_analyst,PROD)", + "type": "TRANSFORMED" }, { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)" - }, + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,gsl_test_db.public.sales_forecast,PROD)", + "type": "TRANSFORMED" + } + ], + "fineGrainedLineages": [ { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)" + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,gsl_test_db.public.sales_analyst,PROD),name)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV),name)" + ], + "confidenceScore": 1.0 }, { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)" + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,gsl_test_db.public.sales_analyst,PROD),name)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV),name)" + ], + "confidenceScore": 1.0 } ] } @@ -1011,13 +1126,22 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "upstreamLineage", "aspect": { "json": { - "removed": false + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:mssql,library.dbo.book_issue,PROD)", + "type": "TRANSFORMED" + } + ] } }, "systemMetadata": { @@ -1027,50 +1151,27 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "ownership", "aspect": { "json": { - "typeNames": [ - "PowerBI Tile" - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", - "changeType": "UPSERT", - "aspectName": "chartKey", - "aspect": { - "json": { - "dashboardTool": "powerbi", - "chartId": "charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", - "changeType": "UPSERT", - "aspectName": "browsePaths", - "aspect": { - "json": { - "paths": [ - "/powerbi/demo-workspace" - ] + "owners": [ + { + "owner": "urn:li:corpuser:users.User1@foo.com", + "type": "NONE" + }, + { + "owner": "urn:li:corpuser:users.User2@foo.com", + "type": "NONE" + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } } }, "systemMetadata": { @@ -1080,8 +1181,8 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -1100,37 +1201,13 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:users.User1@foo.com", "changeType": "UPSERT", - "aspectName": "chartInfo", + "aspectName": "status", "aspect": { "json": { - "customProperties": { - "createdFrom": "Dataset", - "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed", - "datasetWebUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details" - }, - "title": "yearly_sales", - "description": "yearly_sales", - "lastModified": { - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - }, - "inputs": [ - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)" - } - ] + "removed": false } }, "systemMetadata": { @@ -1140,8 +1217,8 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:users.User2@foo.com", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -1156,14 +1233,22 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", "changeType": "UPSERT", - "aspectName": "chartKey", + "aspectName": "upstreamLineage", "aspect": { "json": { - "dashboardTool": "powerbi", - "chartId": "charts.23212598-23b5-4980-87cc-5fc0ecd84385" + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:oracle,salesdb.HR.EMPLOYEES,PROD)", + "type": "TRANSFORMED" + } + ] } }, "systemMetadata": { @@ -1173,15 +1258,15 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "viewProperties", "aspect": { "json": { - "typeNames": [ - "PowerBI Tile" - ] + "materialized": false, + "viewLogic": "let\n Source = Oracle.Database(\"localhost:1521/salesdb.domain.com\", [HierarchicalNavigation=true]), HR = Source{[Schema=\"HR\"]}[Data], EMPLOYEES1 = HR{[Name=\"EMPLOYEES\"]}[Data] \n in EMPLOYEES1", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -1191,15 +1276,15 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", "changeType": "UPSERT", - "aspectName": "browsePaths", + "aspectName": "viewProperties", "aspect": { "json": { - "paths": [ - "/powerbi/demo-workspace" - ] + "materialized": false, + "viewLogic": "let\n Source = PostgreSQL.Database(\"localhost\" , \"mics\" ),\n public_order_date = Source{[Schema=\"public\",Item=\"order_date\"]}[Data] \n in \n public_order_date", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -1209,17 +1294,19 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "datasetProperties", "aspect": { "json": { - "path": [ - { - "id": "demo-workspace" - } - ] + "customProperties": { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", + "name": "postgres_test_table", + "description": "Library dataset description", + "tags": [] } }, "systemMetadata": { @@ -1229,14 +1316,14 @@ } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", "changeType": "UPSERT", - "aspectName": "browsePaths", + "aspectName": "subTypes", "aspect": { "json": { - "paths": [ - "/powerbi/demo-workspace" + "typeNames": [ + "Table" ] } }, @@ -1247,77 +1334,8 @@ } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", - "changeType": "PATCH", - "aspectName": "dashboardInfo", - "aspect": { - "json": [ - { - "op": "add", - "path": "/customProperties/chartCount", - "value": "2" - }, - { - "op": "add", - "path": "/customProperties/workspaceName", - "value": "demo-workspace" - }, - { - "op": "add", - "path": "/customProperties/workspaceId", - "value": "64ED5CAD-7C10-4684-8180-826122881108" - }, - { - "op": "add", - "path": "/title", - "value": "test_dashboard" - }, - { - "op": "add", - "path": "/description", - "value": "Description of test dashboard" - }, - { - "op": "add", - "path": "/charts/urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", - "value": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)" - }, - { - "op": "add", - "path": "/charts/urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", - "value": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)" - }, - { - "op": "add", - "path": "/dashboardUrl", - "value": "https://localhost/dashboards/web/1" - }, - { - "op": "add", - "path": "/lastModified", - "value": { - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - } - ] - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -1332,44 +1350,19 @@ } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", - "changeType": "UPSERT", - "aspectName": "dashboardKey", - "aspect": { - "json": { - "dashboardTool": "powerbi", - "dashboardId": "powerbi.linkedin.com/dashboards/7D668CAD-7FFC-4505-9215-655BCA5BEBAE" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", "changeType": "UPSERT", - "aspectName": "ownership", + "aspectName": "datasetProperties", "aspect": { "json": { - "owners": [ - { - "owner": "urn:li:corpuser:users.User1@foo.com", - "type": "NONE" - }, - { - "owner": "urn:li:corpuser:users.User2@foo.com", - "type": "NONE" - } - ], - "ownerTypes": {}, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } + "customProperties": { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", + "name": "job-history", + "description": "Library dataset description", + "tags": [] } }, "systemMetadata": { @@ -1379,16 +1372,14 @@ } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "subTypes", "aspect": { "json": { - "path": [ - { - "id": "demo-workspace" - } + "typeNames": [ + "Table" ] } }, @@ -1400,14 +1391,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,employee-dataset.employee_ctc,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "status", "aspect": { "json": { - "materialized": false, - "viewLogic": "dummy", - "viewLanguage": "m_query" + "removed": false } }, "systemMetadata": { @@ -1417,13 +1406,19 @@ } }, { - "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:users.User1@foo.com", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,employee-dataset.employee_ctc,DEV)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "datasetProperties", "aspect": { "json": { - "removed": false + "customProperties": { + "datasetId": "91580e0e-1680-4b1c-bbf9-4f6764d7a5ff" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/91580e0e-1680-4b1c-bbf9-4f6764d7a5ff/details", + "name": "employee_ctc", + "description": "Employee Management", + "tags": [] } }, "systemMetadata": { @@ -1434,12 +1429,21 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,employee-dataset.employee_ctc,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "upstreamLineage", "aspect": { "json": { - "removed": false + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,mics.public.order_date,PROD)", + "type": "TRANSFORMED" + } + ] } }, "systemMetadata": { @@ -1470,16 +1474,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,employee-dataset.employee_ctc,DEV)", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "viewProperties", "aspect": { "json": { - "customProperties": { - "datasetId": "91580e0e-1680-4b1c-bbf9-4f6764d7a5ff" - }, - "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/91580e0e-1680-4b1c-bbf9-4f6764d7a5ff/details", - "name": "employee_ctc", - "description": "Employee Management", - "tags": [] + "materialized": false, + "viewLogic": "dummy", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -1489,8 +1489,8 @@ } }, { - "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:users.User2@foo.com", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,employee-dataset.employee_ctc,DEV)", "changeType": "UPSERT", "aspectName": "status", "aspect": { diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_cross_workspace_dataset.json b/metadata-ingestion/tests/integration/powerbi/golden_test_cross_workspace_dataset.json new file mode 100644 index 0000000000000..06286fcd5937a --- /dev/null +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_cross_workspace_dataset.json @@ -0,0 +1,668 @@ +[ +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,sales.sales_semantic_model.public_issue_history,DEV)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "dummy", + "viewLanguage": "m_query" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.885D1762-1655-46BA-AFE3-74C6EC403A9E)", + "changeType": "UPSERT", + "aspectName": "chartInfo", + "aspect": { + "json": { + "customProperties": { + "createdFrom": "Dataset", + "datasetId": "317456E5-1FC7-4BDC-9C84-1185825E293D", + "datasetWebUrl": "http://localhost/groups/A8D655A6-F521-477E-8C22-255018583BF4/datasets/317456E5-1FC7-4BDC-9C84-1185825E293D/details" + }, + "title": "Sale Order Tile", + "description": "Sale Order Tile", + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + }, + "inputs": [ + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,sales.sales_semantic_model.public_issue_history,DEV)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,sales.sales_semantic_model.public_issue_history,DEV)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,sales.sales_semantic_model.public_issue_history,DEV)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "datasetId": "317456E5-1FC7-4BDC-9C84-1185825E293D" + }, + "externalUrl": "http://localhost/groups/A8D655A6-F521-477E-8C22-255018583BF4/datasets/317456E5-1FC7-4BDC-9C84-1185825E293D/details", + "name": "public issue_history", + "description": "sales semantic model", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,sales.sales_semantic_model.public_issue_history,DEV)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.885D1762-1655-46BA-AFE3-74C6EC403A9E)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "PowerBI Tile" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.885D1762-1655-46BA-AFE3-74C6EC403A9E)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.885D1762-1655-46BA-AFE3-74C6EC403A9E)", + "changeType": "UPSERT", + "aspectName": "chartKey", + "aspect": { + "json": { + "dashboardTool": "powerbi", + "chartId": "charts.885D1762-1655-46BA-AFE3-74C6EC403A9E" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,global_workspace.base_records.core_sales_set,DEV)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "dummy", + "viewLanguage": "m_query" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.885D1762-1655-46BA-AFE3-74C6EC403A9E)", + "changeType": "UPSERT", + "aspectName": "browsePaths", + "aspect": { + "json": { + "paths": [ + "/powerbi/Sales" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,global_workspace.base_records.core_sales_set,DEV)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,global_workspace.base_records.core_sales_set,DEV)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "datasetId": "FE362B98-956E-4394-BA37-6367EE6435E9" + }, + "externalUrl": "http://localhost/groups/C5DA6EA8-625E-4AB1-90B6-CAEA0BF9F492/datasets/FE362B98-956E-4394-BA37-6367EE6435E9/details", + "name": "core_sales_set", + "description": "base_records", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,global_workspace.base_records.core_sales_set,DEV)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.885D1762-1655-46BA-AFE3-74C6EC403A9E)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Sales" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.945C2C2A-4588-45DE-8385-F24F5E39A57C)", + "changeType": "UPSERT", + "aspectName": "chartInfo", + "aspect": { + "json": { + "customProperties": { + "createdFrom": "Dataset", + "datasetId": "FE362B98-956E-4394-BA37-6367EE6435E9", + "datasetWebUrl": "http://localhost/groups/C5DA6EA8-625E-4AB1-90B6-CAEA0BF9F492/datasets/FE362B98-956E-4394-BA37-6367EE6435E9/details" + }, + "title": "Yearly Sales", + "description": "Yearly Sales", + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + }, + "inputs": [ + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,global_workspace.base_records.core_sales_set,DEV)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.945C2C2A-4588-45DE-8385-F24F5E39A57C)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "PowerBI Tile" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.945C2C2A-4588-45DE-8385-F24F5E39A57C)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.945C2C2A-4588-45DE-8385-F24F5E39A57C)", + "changeType": "UPSERT", + "aspectName": "chartKey", + "aspect": { + "json": { + "dashboardTool": "powerbi", + "chartId": "charts.945C2C2A-4588-45DE-8385-F24F5E39A57C" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.945C2C2A-4588-45DE-8385-F24F5E39A57C)", + "changeType": "UPSERT", + "aspectName": "browsePaths", + "aspect": { + "json": { + "paths": [ + "/powerbi/Sales" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.945C2C2A-4588-45DE-8385-F24F5E39A57C)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Sales" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.B847EBDA-BC48-4F92-8E16-6B46D900E7BB)", + "changeType": "UPSERT", + "aspectName": "chartInfo", + "aspect": { + "json": { + "customProperties": { + "createdFrom": "Dataset", + "datasetId": "0F0ADA0E-E38A-44F6-B667-90E93A96F5A1" + }, + "title": "Not Present In Current Ingestion", + "description": "Not Present In Current Ingestion", + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + }, + "inputs": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.A1C7204F-4D04-4E5E-B886-B30EA2C64CB3)", + "changeType": "UPSERT", + "aspectName": "browsePaths", + "aspect": { + "json": { + "paths": [ + "/powerbi/Sales" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.B847EBDA-BC48-4F92-8E16-6B46D900E7BB)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "PowerBI Tile" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.B847EBDA-BC48-4F92-8E16-6B46D900E7BB)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.A1C7204F-4D04-4E5E-B886-B30EA2C64CB3)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.A1C7204F-4D04-4E5E-B886-B30EA2C64CB3)", + "changeType": "PATCH", + "aspectName": "dashboardInfo", + "aspect": { + "json": [ + { + "op": "add", + "path": "/customProperties/chartCount", + "value": "3" + }, + { + "op": "add", + "path": "/customProperties/workspaceName", + "value": "Sales" + }, + { + "op": "add", + "path": "/customProperties/workspaceId", + "value": "A8D655A6-F521-477E-8C22-255018583BF4" + }, + { + "op": "add", + "path": "/title", + "value": "test_dashboard" + }, + { + "op": "add", + "path": "/description", + "value": "Description of test dashboard" + }, + { + "op": "add", + "path": "/charts/urn:li:chart:(powerbi,charts.885D1762-1655-46BA-AFE3-74C6EC403A9E)", + "value": "urn:li:chart:(powerbi,charts.885D1762-1655-46BA-AFE3-74C6EC403A9E)" + }, + { + "op": "add", + "path": "/charts/urn:li:chart:(powerbi,charts.945C2C2A-4588-45DE-8385-F24F5E39A57C)", + "value": "urn:li:chart:(powerbi,charts.945C2C2A-4588-45DE-8385-F24F5E39A57C)" + }, + { + "op": "add", + "path": "/charts/urn:li:chart:(powerbi,charts.B847EBDA-BC48-4F92-8E16-6B46D900E7BB)", + "value": "urn:li:chart:(powerbi,charts.B847EBDA-BC48-4F92-8E16-6B46D900E7BB)" + }, + { + "op": "add", + "path": "/dashboardUrl", + "value": "https://localhost/dashboards/web/1" + }, + { + "op": "add", + "path": "/lastModified", + "value": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.B847EBDA-BC48-4F92-8E16-6B46D900E7BB)", + "changeType": "UPSERT", + "aspectName": "chartKey", + "aspect": { + "json": { + "dashboardTool": "powerbi", + "chartId": "charts.B847EBDA-BC48-4F92-8E16-6B46D900E7BB" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.B847EBDA-BC48-4F92-8E16-6B46D900E7BB)", + "changeType": "UPSERT", + "aspectName": "browsePaths", + "aspect": { + "json": { + "paths": [ + "/powerbi/Sales" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.B847EBDA-BC48-4F92-8E16-6B46D900E7BB)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Sales" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.A1C7204F-4D04-4E5E-B886-B30EA2C64CB3)", + "changeType": "UPSERT", + "aspectName": "dashboardKey", + "aspect": { + "json": { + "dashboardTool": "powerbi", + "dashboardId": "powerbi.linkedin.com/dashboards/A1C7204F-4D04-4E5E-B886-B30EA2C64CB3" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.A1C7204F-4D04-4E5E-B886-B30EA2C64CB3)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Sales" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_disabled_ownership.json b/metadata-ingestion/tests/integration/powerbi/golden_test_disabled_ownership.json index 665f5d5a3bb41..099f75a190ca2 100644 --- a/metadata-ingestion/tests/integration/powerbi/golden_test_disabled_ownership.json +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_disabled_ownership.json @@ -17,6 +17,40 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "changeType": "UPSERT", + "aspectName": "browsePaths", + "aspect": { + "json": { + "paths": [ + "/powerbi/demo-workspace" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", @@ -40,8 +74,8 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -74,15 +108,15 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = Snowflake.Databases(\"hp123rt5.ap-southeast-2.fakecomputing.com\",\"PBI_TEST_WAREHOUSE_PROD\",[Role=\"PBI_TEST_MEMBER\"]),\n PBI_TEST_Database = Source{[Name=\"PBI_TEST\",Kind=\"Database\"]}[Data],\n TEST_Schema = PBI_TEST_Database{[Name=\"TEST\",Kind=\"Schema\"]}[Data],\n TESTTABLE_Table = TEST_Schema{[Name=\"TESTTABLE\",Kind=\"Table\"]}[Data]\nin\n TESTTABLE_Table", - "viewLanguage": "m_query" + "typeNames": [ + "PowerBI Tile" + ] } }, "systemMetadata": { @@ -92,19 +126,17 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "browsePathsV2", "aspect": { "json": { - "customProperties": { - "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" - }, - "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "SNOWFLAKE_TESTTABLE", - "description": "Library dataset description", - "tags": [] + "path": [ + { + "id": "demo-workspace" + } + ] } }, "systemMetadata": { @@ -117,10 +149,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "viewProperties", "aspect": { "json": { - "removed": false + "materialized": false, + "viewLogic": "let\n Source = Snowflake.Databases(\"hp123rt5.ap-southeast-2.fakecomputing.com\",\"PBI_TEST_WAREHOUSE_PROD\",[Role=\"PBI_TEST_MEMBER\"]),\n PBI_TEST_Database = Source{[Name=\"PBI_TEST\",Kind=\"Database\"]}[Data],\n TEST_Schema = PBI_TEST_Database{[Name=\"TEST\",Kind=\"Schema\"]}[Data],\n TESTTABLE_Table = TEST_Schema{[Name=\"TESTTABLE\",Kind=\"Table\"]}[Data]\nin\n TESTTABLE_Table", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -130,15 +164,14 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "chartKey", "aspect": { "json": { - "typeNames": [ - "Table" - ] + "dashboardTool": "powerbi", + "chartId": "charts.23212598-23b5-4980-87cc-5fc0ecd84385" } }, "systemMetadata": { @@ -149,14 +182,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "status", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"SELECT#(lf)concat((UPPER(REPLACE(SELLER,'-',''))), MONTHID) as AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTHID) as CD_AGENT_KEY,#(lf) *#(lf)FROM#(lf)OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_APS_SME_UNITS_V4\", null, [EnableFolding=true]),\n #\"Added Conditional Column\" = Table.AddColumn(Source, \"SME Units ENT\", each if [DEAL_TYPE] = \"SME Unit\" then [UNIT] else 0),\n #\"Added Conditional Column1\" = Table.AddColumn(#\"Added Conditional Column\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" then [UNIT] else 0),\n #\"Removed Columns\" = Table.RemoveColumns(#\"Added Conditional Column1\",{\"Banklink Units\"}),\n #\"Added Custom\" = Table.AddColumn(#\"Removed Columns\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" and [SALES_TYPE] = \"3 - Upsell\"\nthen [UNIT]\n\nelse if [SALES_TYPE] = \"Adjusted BL Migration\"\nthen [UNIT]\n\nelse 0),\n #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"SME Units in $ (*$361)\", each if [DEAL_TYPE] = \"SME Unit\" \nand [SALES_TYPE] <> \"4 - Renewal\"\n then [UNIT] * 361\nelse 0),\n #\"Added Custom2\" = Table.AddColumn(#\"Added Custom1\", \"Banklink in $ (*$148)\", each [Banklink Units] * 148)\nin\n #\"Added Custom2\"", - "viewLanguage": "m_query" + "removed": false } }, "systemMetadata": { @@ -167,7 +198,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { @@ -176,7 +207,7 @@ "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" }, "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "snowflake native-query", + "name": "SNOWFLAKE_TESTTABLE", "description": "Library dataset description", "tags": [] } @@ -191,10 +222,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "viewProperties", "aspect": { "json": { - "removed": false + "materialized": false, + "viewLogic": "let\n Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"SELECT#(lf)concat((UPPER(REPLACE(SELLER,'-',''))), MONTHID) as AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTHID) as CD_AGENT_KEY,#(lf) *#(lf)FROM#(lf)OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_APS_SME_UNITS_V4\", null, [EnableFolding=true]),\n #\"Added Conditional Column\" = Table.AddColumn(Source, \"SME Units ENT\", each if [DEAL_TYPE] = \"SME Unit\" then [UNIT] else 0),\n #\"Added Conditional Column1\" = Table.AddColumn(#\"Added Conditional Column\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" then [UNIT] else 0),\n #\"Removed Columns\" = Table.RemoveColumns(#\"Added Conditional Column1\",{\"Banklink Units\"}),\n #\"Added Custom\" = Table.AddColumn(#\"Removed Columns\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" and [SALES_TYPE] = \"3 - Upsell\"\nthen [UNIT]\n\nelse if [SALES_TYPE] = \"Adjusted BL Migration\"\nthen [UNIT]\n\nelse 0),\n #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"SME Units in $ (*$361)\", each if [DEAL_TYPE] = \"SME Unit\" \nand [SALES_TYPE] <> \"4 - Renewal\"\n then [UNIT] * 361\nelse 0),\n #\"Added Custom2\" = Table.AddColumn(#\"Added Custom1\", \"Banklink in $ (*$148)\", each [Banklink Units] * 148)\nin\n #\"Added Custom2\"", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -204,14 +237,36 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "chartInfo", "aspect": { "json": { - "typeNames": [ - "Table" + "customProperties": { + "createdFrom": "Dataset", + "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed", + "datasetWebUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details" + }, + "title": "yearly_sales", + "description": "yearly_sales", + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + }, + "inputs": [ + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)" + } ] } }, @@ -223,14 +278,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "status", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = GoogleBigQuery.Database([BillingProject = #\"Parameter - Source\"]),\n#\"gcp-project\" = Source{[Name=#\"Parameter - Source\"]}[Data],\nuniversal_Schema = #\"gcp-project\"{[Name=\"universal\",Kind=\"Schema\"]}[Data],\nD_WH_DATE_Table = universal_Schema{[Name=\"D_WH_DATE\",Kind=\"Table\"]}[Data],\n#\"Filtered Rows\" = Table.SelectRows(D_WH_DATE_Table, each [D_DATE] > #datetime(2019, 9, 10, 0, 0, 0)),\n#\"Filtered Rows1\" = Table.SelectRows(#\"Filtered Rows\", each DateTime.IsInPreviousNHours([D_DATE], 87600))\n in \n#\"Filtered Rows1\"", - "viewLanguage": "m_query" + "removed": false } }, "systemMetadata": { @@ -241,7 +294,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { @@ -250,7 +303,7 @@ "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" }, "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "big-query-with-parameter", + "name": "snowflake native-query", "description": "Library dataset description", "tags": [] } @@ -263,12 +316,14 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "subTypes", "aspect": { "json": { - "removed": false + "typeNames": [ + "Table" + ] } }, "systemMetadata": { @@ -279,7 +334,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -296,15 +351,15 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "browsePaths", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = Value.NativeQuery(Snowflake.Databases(\"xaa48144.snowflakecomputing.com\",\"GSL_TEST_WH\",[Role=\"ACCOUNTADMIN\"]){[Name=\"GSL_TEST_DB\"]}[Data], \"select A.name from GSL_TEST_DB.PUBLIC.SALES_ANALYST as A inner join GSL_TEST_DB.PUBLIC.SALES_FORECAST as B on A.name = B.name where startswith(A.name, 'mo')\", null, [EnableFolding=true])\nin\n Source", - "viewLanguage": "m_query" + "paths": [ + "/powerbi/demo-workspace" + ] } }, "systemMetadata": { @@ -314,19 +369,13 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "status", "aspect": { "json": { - "customProperties": { - "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" - }, - "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "snowflake native-query-with-join", - "description": "Library dataset description", - "tags": [] + "removed": false } }, "systemMetadata": { @@ -337,12 +386,14 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "viewProperties", "aspect": { "json": { - "removed": false + "materialized": false, + "viewLogic": "let\n Source = GoogleBigQuery.Database([BillingProject = #\"Parameter - Source\"]),\n#\"gcp-project\" = Source{[Name=#\"Parameter - Source\"]}[Data],\nuniversal_Schema = #\"gcp-project\"{[Name=\"universal\",Kind=\"Schema\"]}[Data],\nD_WH_DATE_Table = universal_Schema{[Name=\"D_WH_DATE\",Kind=\"Table\"]}[Data],\n#\"Filtered Rows\" = Table.SelectRows(D_WH_DATE_Table, each [D_DATE] > #datetime(2019, 9, 10, 0, 0, 0)),\n#\"Filtered Rows1\" = Table.SelectRows(#\"Filtered Rows\", each DateTime.IsInPreviousNHours([D_DATE], 87600))\n in \n#\"Filtered Rows1\"", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -352,14 +403,51 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "chartInfo", "aspect": { "json": { - "typeNames": [ - "Table" + "customProperties": { + "createdFrom": "Dataset", + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", + "datasetWebUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details" + }, + "title": "test_tile", + "description": "test_tile", + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + }, + "inputs": [ + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)" + } ] } }, @@ -371,14 +459,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "status", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = Oracle.Database(\"localhost:1521/salesdb.domain.com\", [HierarchicalNavigation=true]), HR = Source{[Schema=\"HR\"]}[Data], EMPLOYEES1 = HR{[Name=\"EMPLOYEES\"]}[Data] \n in EMPLOYEES1", - "viewLanguage": "m_query" + "removed": false } }, "systemMetadata": { @@ -389,7 +475,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { @@ -398,7 +484,7 @@ "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" }, "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "job-history", + "name": "big-query-with-parameter", "description": "Library dataset description", "tags": [] } @@ -410,13 +496,15 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "subTypes", "aspect": { "json": { - "removed": false + "typeNames": [ + "PowerBI Tile" + ] } }, "systemMetadata": { @@ -427,7 +515,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -444,15 +532,17 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "browsePathsV2", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = PostgreSQL.Database(\"localhost\" , \"mics\" ),\n public_order_date = Source{[Schema=\"public\",Item=\"order_date\"]}[Data] \n in \n public_order_date", - "viewLanguage": "m_query" + "path": [ + { + "id": "demo-workspace" + } + ] } }, "systemMetadata": { @@ -462,19 +552,14 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "chartKey", "aspect": { "json": { - "customProperties": { - "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" - }, - "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "postgres_test_table", - "description": "Library dataset description", - "tags": [] + "dashboardTool": "powerbi", + "chartId": "charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0" } }, "systemMetadata": { @@ -485,12 +570,14 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "viewProperties", "aspect": { "json": { - "removed": false + "materialized": false, + "viewLogic": "let\n Source = Value.NativeQuery(Snowflake.Databases(\"xaa48144.snowflakecomputing.com\",\"GSL_TEST_WH\",[Role=\"ACCOUNTADMIN\"]){[Name=\"GSL_TEST_DB\"]}[Data], \"select A.name from GSL_TEST_DB.PUBLIC.SALES_ANALYST as A inner join GSL_TEST_DB.PUBLIC.SALES_FORECAST as B on A.name = B.name where startswith(A.name, 'mo')\", null, [EnableFolding=true])\nin\n Source", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -500,14 +587,14 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePaths", "aspect": { "json": { - "typeNames": [ - "Table" + "paths": [ + "/powerbi/demo-workspace" ] } }, @@ -519,14 +606,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "status", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = Sql.Database(\"localhost\", \"library\"),\n dbo_book_issue = Source{[Schema=\"dbo\",Item=\"book_issue\"]}[Data]\n in dbo_book_issue", - "viewLanguage": "m_query" + "removed": false } }, "systemMetadata": { @@ -537,17 +622,17 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { "json": { "customProperties": { - "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed" + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" }, - "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details", - "name": "dbo_book_issue", - "description": "hr pbi test description", + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", + "name": "snowflake native-query-with-join", + "description": "Library dataset description", "tags": [] } }, @@ -559,12 +644,14 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "viewProperties", "aspect": { "json": { - "removed": false + "materialized": false, + "viewLogic": "let\n Source = Sql.Database(\"AUPRDWHDB\", \"COMMOPSDB\", [Query=\"select *,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTH_WID) as CD_AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,'-',''))), MONTH_WID) as AGENT_KEY#(lf)#(lf)from V_PS_CD_RETENTION\", CommandTimeout=#duration(0, 1, 30, 0)]),\n #\"Changed Type\" = Table.TransformColumnTypes(Source,{{\"mth_date\", type date}}),\n #\"Added Custom\" = Table.AddColumn(#\"Changed Type\", \"Month\", each Date.Month([mth_date])),\n #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"TPV Opening\", each if [Month] = 1 then [TPV_AMV_OPENING]\nelse if [Month] = 2 then 0\nelse if [Month] = 3 then 0\nelse if [Month] = 4 then [TPV_AMV_OPENING]\nelse if [Month] = 5 then 0\nelse if [Month] = 6 then 0\nelse if [Month] = 7 then [TPV_AMV_OPENING]\nelse if [Month] = 8 then 0\nelse if [Month] = 9 then 0\nelse if [Month] = 10 then [TPV_AMV_OPENING]\nelse if [Month] = 11 then 0\nelse if [Month] = 12 then 0\n\nelse 0)\nin\n #\"Added Custom1\"", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -577,12 +664,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "viewProperties", "aspect": { "json": { - "typeNames": [ - "Table" - ] + "materialized": false, + "viewLogic": "let\n Source = Sql.Database(\"localhost\", \"library\"),\n dbo_book_issue = Source{[Schema=\"dbo\",Item=\"book_issue\"]}[Data]\n in dbo_book_issue", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -595,12 +682,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "status", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = Sql.Database(\"AUPRDWHDB\", \"COMMOPSDB\", [Query=\"select *,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTH_WID) as CD_AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,'-',''))), MONTH_WID) as AGENT_KEY#(lf)#(lf)from V_PS_CD_RETENTION\", CommandTimeout=#duration(0, 1, 30, 0)]),\n #\"Changed Type\" = Table.TransformColumnTypes(Source,{{\"mth_date\", type date}}),\n #\"Added Custom\" = Table.AddColumn(#\"Changed Type\", \"Month\", each Date.Month([mth_date])),\n #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"TPV Opening\", each if [Month] = 1 then [TPV_AMV_OPENING]\nelse if [Month] = 2 then 0\nelse if [Month] = 3 then 0\nelse if [Month] = 4 then [TPV_AMV_OPENING]\nelse if [Month] = 5 then 0\nelse if [Month] = 6 then 0\nelse if [Month] = 7 then [TPV_AMV_OPENING]\nelse if [Month] = 8 then 0\nelse if [Month] = 9 then 0\nelse if [Month] = 10 then [TPV_AMV_OPENING]\nelse if [Month] = 11 then 0\nelse if [Month] = 12 then 0\n\nelse 0)\nin\n #\"Added Custom1\"", - "viewLanguage": "m_query" + "removed": false } }, "systemMetadata": { @@ -632,14 +717,67 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", - "changeType": "UPSERT", - "aspectName": "status", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "changeType": "PATCH", + "aspectName": "dashboardInfo", "aspect": { - "json": { - "removed": false - } + "json": [ + { + "op": "add", + "path": "/customProperties/chartCount", + "value": "2" + }, + { + "op": "add", + "path": "/customProperties/workspaceName", + "value": "demo-workspace" + }, + { + "op": "add", + "path": "/customProperties/workspaceId", + "value": "64ED5CAD-7C10-4684-8180-826122881108" + }, + { + "op": "add", + "path": "/title", + "value": "test_dashboard" + }, + { + "op": "add", + "path": "/description", + "value": "Description of test dashboard" + }, + { + "op": "add", + "path": "/charts/urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "value": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)" + }, + { + "op": "add", + "path": "/charts/urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "value": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)" + }, + { + "op": "add", + "path": "/dashboardUrl", + "value": "https://localhost/dashboards/web/1" + }, + { + "op": "add", + "path": "/lastModified", + "value": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + } + ] }, "systemMetadata": { "lastObserved": 1643871600000, @@ -666,52 +804,13 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", "changeType": "UPSERT", - "aspectName": "chartInfo", + "aspectName": "status", "aspect": { "json": { - "customProperties": { - "createdFrom": "Dataset", - "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", - "datasetWebUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details" - }, - "title": "test_tile", - "description": "test_tile", - "lastModified": { - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - }, - "inputs": [ - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)" - } - ] + "removed": false } }, "systemMetadata": { @@ -721,13 +820,19 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "datasetProperties", "aspect": { "json": { - "removed": false + "customProperties": { + "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details", + "name": "dbo_book_issue", + "description": "hr pbi test description", + "tags": [] } }, "systemMetadata": { @@ -737,14 +842,14 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { "json": { "typeNames": [ - "PowerBI Tile" + "Table" ] } }, @@ -755,14 +860,15 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", "changeType": "UPSERT", - "aspectName": "chartKey", + "aspectName": "subTypes", "aspect": { "json": { - "dashboardTool": "powerbi", - "chartId": "charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0" + "typeNames": [ + "Table" + ] } }, "systemMetadata": { @@ -772,15 +878,13 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", "changeType": "UPSERT", - "aspectName": "browsePaths", + "aspectName": "status", "aspect": { "json": { - "paths": [ - "/powerbi/demo-workspace" - ] + "removed": false } }, "systemMetadata": { @@ -790,17 +894,14 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "dashboardKey", "aspect": { "json": { - "path": [ - { - "id": "demo-workspace" - } - ] + "dashboardTool": "powerbi", + "dashboardId": "powerbi.linkedin.com/dashboards/7D668CAD-7FFC-4505-9215-655BCA5BEBAE" } }, "systemMetadata": { @@ -810,35 +911,15 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", "changeType": "UPSERT", - "aspectName": "chartInfo", + "aspectName": "browsePathsV2", "aspect": { "json": { - "customProperties": { - "createdFrom": "Dataset", - "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed", - "datasetWebUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details" - }, - "title": "yearly_sales", - "description": "yearly_sales", - "lastModified": { - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - }, - "inputs": [ - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)" - }, + "path": [ { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)" + "id": "demo-workspace" } ] } @@ -850,30 +931,19 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", "changeType": "UPSERT", - "aspectName": "chartKey", + "aspectName": "datasetProperties", "aspect": { "json": { - "dashboardTool": "powerbi", - "chartId": "charts.23212598-23b5-4980-87cc-5fc0ecd84385" + "customProperties": { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", + "name": "job-history", + "description": "Library dataset description", + "tags": [] } }, "systemMetadata": { @@ -883,14 +953,14 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { "json": { "typeNames": [ - "PowerBI Tile" + "Table" ] } }, @@ -901,15 +971,15 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", "changeType": "UPSERT", - "aspectName": "browsePaths", + "aspectName": "viewProperties", "aspect": { "json": { - "paths": [ - "/powerbi/demo-workspace" - ] + "materialized": false, + "viewLogic": "let\n Source = Oracle.Database(\"localhost:1521/salesdb.domain.com\", [HierarchicalNavigation=true]), HR = Source{[Schema=\"HR\"]}[Data], EMPLOYEES1 = HR{[Name=\"EMPLOYEES\"]}[Data] \n in EMPLOYEES1", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -919,17 +989,15 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "viewProperties", "aspect": { "json": { - "path": [ - { - "id": "demo-workspace" - } - ] + "materialized": false, + "viewLogic": "let\n Source = PostgreSQL.Database(\"localhost\" , \"mics\" ),\n public_order_date = Source{[Schema=\"public\",Item=\"order_date\"]}[Data] \n in \n public_order_date", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -939,15 +1007,13 @@ } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", "changeType": "UPSERT", - "aspectName": "browsePaths", + "aspectName": "status", "aspect": { "json": { - "paths": [ - "/powerbi/demo-workspace" - ] + "removed": false } }, "systemMetadata": { @@ -957,77 +1023,8 @@ } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", - "changeType": "PATCH", - "aspectName": "dashboardInfo", - "aspect": { - "json": [ - { - "op": "add", - "path": "/customProperties/chartCount", - "value": "2" - }, - { - "op": "add", - "path": "/customProperties/workspaceName", - "value": "demo-workspace" - }, - { - "op": "add", - "path": "/customProperties/workspaceId", - "value": "64ED5CAD-7C10-4684-8180-826122881108" - }, - { - "op": "add", - "path": "/title", - "value": "test_dashboard" - }, - { - "op": "add", - "path": "/description", - "value": "Description of test dashboard" - }, - { - "op": "add", - "path": "/charts/urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", - "value": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)" - }, - { - "op": "add", - "path": "/charts/urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", - "value": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)" - }, - { - "op": "add", - "path": "/dashboardUrl", - "value": "https://localhost/dashboards/web/1" - }, - { - "op": "add", - "path": "/lastModified", - "value": { - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - } - ] - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -1042,14 +1039,19 @@ } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", "changeType": "UPSERT", - "aspectName": "dashboardKey", + "aspectName": "datasetProperties", "aspect": { "json": { - "dashboardTool": "powerbi", - "dashboardId": "powerbi.linkedin.com/dashboards/7D668CAD-7FFC-4505-9215-655BCA5BEBAE" + "customProperties": { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", + "name": "postgres_test_table", + "description": "Library dataset description", + "tags": [] } }, "systemMetadata": { @@ -1059,16 +1061,14 @@ } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "subTypes", "aspect": { "json": { - "path": [ - { - "id": "demo-workspace" - } + "typeNames": [ + "Table" ] } }, diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_endorsement.json b/metadata-ingestion/tests/integration/powerbi/golden_test_endorsement.json index 26476e61a0bd7..3c02b146a900c 100644 --- a/metadata-ingestion/tests/integration/powerbi/golden_test_endorsement.json +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_endorsement.json @@ -40,13 +40,13 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", + "entityType": "tag", + "entityUrn": "urn:li:tag:Promoted", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "tagKey", "aspect": { "json": { - "removed": false + "name": "Promoted" } }, "systemMetadata": { @@ -59,12 +59,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "status", "aspect": { "json": { - "typeNames": [ - "Table" - ] + "removed": false } }, "systemMetadata": { @@ -74,17 +72,13 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:users.User2@foo.com", "changeType": "UPSERT", - "aspectName": "globalTags", + "aspectName": "corpUserKey", "aspect": { "json": { - "tags": [ - { - "tag": "urn:li:tag:Promoted" - } - ] + "username": "User2@foo.com" } }, "systemMetadata": { @@ -95,14 +89,14 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = Snowflake.Databases(\"hp123rt5.ap-southeast-2.fakecomputing.com\",\"PBI_TEST_WAREHOUSE_PROD\",[Role=\"PBI_TEST_MEMBER\"]),\n PBI_TEST_Database = Source{[Name=\"PBI_TEST\",Kind=\"Database\"]}[Data],\n TEST_Schema = PBI_TEST_Database{[Name=\"TEST\",Kind=\"Schema\"]}[Data],\n TESTTABLE_Table = TEST_Schema{[Name=\"TESTTABLE\",Kind=\"Table\"]}[Data]\nin\n TESTTABLE_Table", - "viewLanguage": "m_query" + "typeNames": [ + "Table" + ] } }, "systemMetadata": { @@ -112,19 +106,13 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:users.User2@foo.com", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "status", "aspect": { "json": { - "customProperties": { - "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" - }, - "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "SNOWFLAKE_TESTTABLE", - "description": "Library dataset description", - "tags": [] + "removed": false } }, "systemMetadata": { @@ -134,13 +122,15 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "browsePaths", "aspect": { "json": { - "removed": false + "paths": [ + "/powerbi/demo-workspace" + ] } }, "systemMetadata": { @@ -151,13 +141,15 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "globalTags", "aspect": { "json": { - "typeNames": [ - "Table" + "tags": [ + { + "tag": "urn:li:tag:Promoted" + } ] } }, @@ -168,17 +160,13 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "globalTags", + "aspectName": "status", "aspect": { "json": { - "tags": [ - { - "tag": "urn:li:tag:Promoted" - } - ] + "removed": false } }, "systemMetadata": { @@ -189,13 +177,13 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { "json": { "materialized": false, - "viewLogic": "let\n Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"SELECT#(lf)concat((UPPER(REPLACE(SELLER,'-',''))), MONTHID) as AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTHID) as CD_AGENT_KEY,#(lf) *#(lf)FROM#(lf)OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_APS_SME_UNITS_V4\", null, [EnableFolding=true]),\n #\"Added Conditional Column\" = Table.AddColumn(Source, \"SME Units ENT\", each if [DEAL_TYPE] = \"SME Unit\" then [UNIT] else 0),\n #\"Added Conditional Column1\" = Table.AddColumn(#\"Added Conditional Column\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" then [UNIT] else 0),\n #\"Removed Columns\" = Table.RemoveColumns(#\"Added Conditional Column1\",{\"Banklink Units\"}),\n #\"Added Custom\" = Table.AddColumn(#\"Removed Columns\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" and [SALES_TYPE] = \"3 - Upsell\"\nthen [UNIT]\n\nelse if [SALES_TYPE] = \"Adjusted BL Migration\"\nthen [UNIT]\n\nelse 0),\n #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"SME Units in $ (*$361)\", each if [DEAL_TYPE] = \"SME Unit\" \nand [SALES_TYPE] <> \"4 - Renewal\"\n then [UNIT] * 361\nelse 0),\n #\"Added Custom2\" = Table.AddColumn(#\"Added Custom1\", \"Banklink in $ (*$148)\", each [Banklink Units] * 148)\nin\n #\"Added Custom2\"", + "viewLogic": "let\n Source = Snowflake.Databases(\"hp123rt5.ap-southeast-2.fakecomputing.com\",\"PBI_TEST_WAREHOUSE_PROD\",[Role=\"PBI_TEST_MEMBER\"]),\n PBI_TEST_Database = Source{[Name=\"PBI_TEST\",Kind=\"Database\"]}[Data],\n TEST_Schema = PBI_TEST_Database{[Name=\"TEST\",Kind=\"Schema\"]}[Data],\n TESTTABLE_Table = TEST_Schema{[Name=\"TESTTABLE\",Kind=\"Table\"]}[Data]\nin\n TESTTABLE_Table", "viewLanguage": "m_query" } }, @@ -207,7 +195,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { @@ -216,7 +204,7 @@ "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" }, "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "snowflake native-query", + "name": "SNOWFLAKE_TESTTABLE", "description": "Library dataset description", "tags": [] } @@ -228,13 +216,15 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "subTypes", "aspect": { "json": { - "removed": false + "typeNames": [ + "PowerBI Tile" + ] } }, "systemMetadata": { @@ -245,14 +235,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "status", "aspect": { "json": { - "typeNames": [ - "Table" - ] + "removed": false } }, "systemMetadata": { @@ -262,15 +250,15 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "globalTags", + "aspectName": "browsePathsV2", "aspect": { "json": { - "tags": [ + "path": [ { - "tag": "urn:li:tag:Promoted" + "id": "demo-workspace" } ] } @@ -283,13 +271,13 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { "json": { "materialized": false, - "viewLogic": "let\n Source = GoogleBigQuery.Database([BillingProject = #\"Parameter - Source\"]),\n#\"gcp-project\" = Source{[Name=#\"Parameter - Source\"]}[Data],\nuniversal_Schema = #\"gcp-project\"{[Name=\"universal\",Kind=\"Schema\"]}[Data],\nD_WH_DATE_Table = universal_Schema{[Name=\"D_WH_DATE\",Kind=\"Table\"]}[Data],\n#\"Filtered Rows\" = Table.SelectRows(D_WH_DATE_Table, each [D_DATE] > #datetime(2019, 9, 10, 0, 0, 0)),\n#\"Filtered Rows1\" = Table.SelectRows(#\"Filtered Rows\", each DateTime.IsInPreviousNHours([D_DATE], 87600))\n in \n#\"Filtered Rows1\"", + "viewLogic": "let\n Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"SELECT#(lf)concat((UPPER(REPLACE(SELLER,'-',''))), MONTHID) as AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTHID) as CD_AGENT_KEY,#(lf) *#(lf)FROM#(lf)OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_APS_SME_UNITS_V4\", null, [EnableFolding=true]),\n #\"Added Conditional Column\" = Table.AddColumn(Source, \"SME Units ENT\", each if [DEAL_TYPE] = \"SME Unit\" then [UNIT] else 0),\n #\"Added Conditional Column1\" = Table.AddColumn(#\"Added Conditional Column\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" then [UNIT] else 0),\n #\"Removed Columns\" = Table.RemoveColumns(#\"Added Conditional Column1\",{\"Banklink Units\"}),\n #\"Added Custom\" = Table.AddColumn(#\"Removed Columns\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" and [SALES_TYPE] = \"3 - Upsell\"\nthen [UNIT]\n\nelse if [SALES_TYPE] = \"Adjusted BL Migration\"\nthen [UNIT]\n\nelse 0),\n #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"SME Units in $ (*$361)\", each if [DEAL_TYPE] = \"SME Unit\" \nand [SALES_TYPE] <> \"4 - Renewal\"\n then [UNIT] * 361\nelse 0),\n #\"Added Custom2\" = Table.AddColumn(#\"Added Custom1\", \"Banklink in $ (*$148)\", each [Banklink Units] * 148)\nin\n #\"Added Custom2\"", "viewLanguage": "m_query" } }, @@ -301,7 +289,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { @@ -310,7 +298,7 @@ "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" }, "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "big-query-with-parameter", + "name": "snowflake native-query", "description": "Library dataset description", "tags": [] } @@ -323,12 +311,14 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "subTypes", "aspect": { "json": { - "removed": false + "typeNames": [ + "Table" + ] } }, "systemMetadata": { @@ -339,14 +329,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "status", "aspect": { "json": { - "typeNames": [ - "Table" - ] + "removed": false } }, "systemMetadata": { @@ -356,17 +344,14 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "globalTags", + "aspectName": "chartKey", "aspect": { "json": { - "tags": [ - { - "tag": "urn:li:tag:Promoted" - } - ] + "dashboardTool": "powerbi", + "chartId": "charts.23212598-23b5-4980-87cc-5fc0ecd84385" } }, "systemMetadata": { @@ -377,14 +362,14 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = Value.NativeQuery(Snowflake.Databases(\"xaa48144.snowflakecomputing.com\",\"GSL_TEST_WH\",[Role=\"ACCOUNTADMIN\"]){[Name=\"GSL_TEST_DB\"]}[Data], \"select A.name from GSL_TEST_DB.PUBLIC.SALES_ANALYST as A inner join GSL_TEST_DB.PUBLIC.SALES_FORECAST as B on A.name = B.name where startswith(A.name, 'mo')\", null, [EnableFolding=true])\nin\n Source", - "viewLanguage": "m_query" + "typeNames": [ + "Table" + ] } }, "systemMetadata": { @@ -394,19 +379,37 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "chartInfo", "aspect": { "json": { "customProperties": { - "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" + "createdFrom": "Dataset", + "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed", + "datasetWebUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details" }, - "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "snowflake native-query-with-join", - "description": "Library dataset description", - "tags": [] + "title": "yearly_sales", + "description": "yearly_sales", + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + }, + "inputs": [ + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)" + } + ] } }, "systemMetadata": { @@ -417,12 +420,16 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "globalTags", "aspect": { "json": { - "removed": false + "tags": [ + { + "tag": "urn:li:tag:Promoted" + } + ] } }, "systemMetadata": { @@ -433,13 +440,15 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "globalTags", "aspect": { "json": { - "typeNames": [ - "Table" + "tags": [ + { + "tag": "urn:li:tag:Promoted" + } ] } }, @@ -450,17 +459,13 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:users.User1@foo.com", "changeType": "UPSERT", - "aspectName": "globalTags", + "aspectName": "corpUserKey", "aspect": { "json": { - "tags": [ - { - "tag": "urn:li:tag:Promoted" - } - ] + "username": "User1@foo.com" } }, "systemMetadata": { @@ -471,13 +476,13 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { "json": { "materialized": false, - "viewLogic": "let\n Source = Oracle.Database(\"localhost:1521/salesdb.domain.com\", [HierarchicalNavigation=true]), HR = Source{[Schema=\"HR\"]}[Data], EMPLOYEES1 = HR{[Name=\"EMPLOYEES\"]}[Data] \n in EMPLOYEES1", + "viewLogic": "let\n Source = GoogleBigQuery.Database([BillingProject = #\"Parameter - Source\"]),\n#\"gcp-project\" = Source{[Name=#\"Parameter - Source\"]}[Data],\nuniversal_Schema = #\"gcp-project\"{[Name=\"universal\",Kind=\"Schema\"]}[Data],\nD_WH_DATE_Table = universal_Schema{[Name=\"D_WH_DATE\",Kind=\"Table\"]}[Data],\n#\"Filtered Rows\" = Table.SelectRows(D_WH_DATE_Table, each [D_DATE] > #datetime(2019, 9, 10, 0, 0, 0)),\n#\"Filtered Rows1\" = Table.SelectRows(#\"Filtered Rows\", each DateTime.IsInPreviousNHours([D_DATE], 87600))\n in \n#\"Filtered Rows1\"", "viewLanguage": "m_query" } }, @@ -489,7 +494,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { @@ -498,7 +503,7 @@ "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" }, "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "job-history", + "name": "big-query-with-parameter", "description": "Library dataset description", "tags": [] } @@ -510,8 +515,8 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:users.User1@foo.com", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -527,14 +532,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "status", "aspect": { "json": { - "typeNames": [ - "Table" - ] + "removed": false } }, "systemMetadata": { @@ -544,16 +547,14 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", "changeType": "UPSERT", - "aspectName": "globalTags", + "aspectName": "browsePaths", "aspect": { "json": { - "tags": [ - { - "tag": "urn:li:tag:Promoted" - } + "paths": [ + "/powerbi/demo-workspace" ] } }, @@ -565,14 +566,14 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = PostgreSQL.Database(\"localhost\" , \"mics\" ),\n public_order_date = Source{[Schema=\"public\",Item=\"order_date\"]}[Data] \n in \n public_order_date", - "viewLanguage": "m_query" + "typeNames": [ + "Table" + ] } }, "systemMetadata": { @@ -582,19 +583,52 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "chartInfo", "aspect": { "json": { "customProperties": { - "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" + "createdFrom": "Dataset", + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", + "datasetWebUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details" }, - "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "postgres_test_table", - "description": "Library dataset description", - "tags": [] + "title": "test_tile", + "description": "test_tile", + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + }, + "inputs": [ + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)" + } + ] } }, "systemMetadata": { @@ -604,8 +638,8 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -621,13 +655,15 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "globalTags", "aspect": { "json": { - "typeNames": [ - "Table" + "tags": [ + { + "tag": "urn:li:tag:Promoted" + } ] } }, @@ -638,16 +674,14 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", "changeType": "UPSERT", - "aspectName": "globalTags", + "aspectName": "subTypes", "aspect": { "json": { - "tags": [ - { - "tag": "urn:li:tag:Promoted" - } + "typeNames": [ + "PowerBI Tile" ] } }, @@ -659,13 +693,13 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { "json": { "materialized": false, - "viewLogic": "let\n Source = Sql.Database(\"localhost\", \"library\"),\n dbo_book_issue = Source{[Schema=\"dbo\",Item=\"book_issue\"]}[Data]\n in dbo_book_issue", + "viewLogic": "let\n Source = Value.NativeQuery(Snowflake.Databases(\"xaa48144.snowflakecomputing.com\",\"GSL_TEST_WH\",[Role=\"ACCOUNTADMIN\"]){[Name=\"GSL_TEST_DB\"]}[Data], \"select A.name from GSL_TEST_DB.PUBLIC.SALES_ANALYST as A inner join GSL_TEST_DB.PUBLIC.SALES_FORECAST as B on A.name = B.name where startswith(A.name, 'mo')\", null, [EnableFolding=true])\nin\n Source", "viewLanguage": "m_query" } }, @@ -677,17 +711,17 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { "json": { "customProperties": { - "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed" + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" }, - "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details", - "name": "dbo_book_issue", - "description": "hr pbi test description", + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", + "name": "snowflake native-query-with-join", + "description": "Library dataset description", "tags": [] } }, @@ -698,13 +732,17 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "browsePathsV2", "aspect": { "json": { - "removed": false + "path": [ + { + "id": "demo-workspace" + } + ] } }, "systemMetadata": { @@ -715,14 +753,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "status", "aspect": { "json": { - "typeNames": [ - "Table" - ] + "removed": false } }, "systemMetadata": { @@ -750,19 +786,14 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "chartKey", "aspect": { "json": { - "customProperties": { - "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed" - }, - "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details", - "name": "ms_sql_native_table", - "description": "hr pbi test description", - "tags": [] + "dashboardTool": "powerbi", + "chartId": "charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0" } }, "systemMetadata": { @@ -791,12 +822,16 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "datasetProperties", "aspect": { "json": { - "typeNames": [ - "Table" - ] + "customProperties": { + "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details", + "name": "ms_sql_native_table", + "description": "hr pbi test description", + "tags": [] } }, "systemMetadata": { @@ -806,13 +841,15 @@ } }, { - "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:users.User1@foo.com", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", "changeType": "UPSERT", - "aspectName": "corpUserKey", + "aspectName": "viewProperties", "aspect": { "json": { - "username": "User1@foo.com" + "materialized": false, + "viewLogic": "let\n Source = Sql.Database(\"localhost\", \"library\"),\n dbo_book_issue = Source{[Schema=\"dbo\",Item=\"book_issue\"]}[Data]\n in dbo_book_issue", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -822,13 +859,15 @@ } }, { - "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:users.User2@foo.com", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", "changeType": "UPSERT", - "aspectName": "corpUserKey", + "aspectName": "subTypes", "aspect": { "json": { - "username": "User2@foo.com" + "typeNames": [ + "Table" + ] } }, "systemMetadata": { @@ -838,51 +877,14 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", "changeType": "UPSERT", - "aspectName": "chartInfo", + "aspectName": "subTypes", "aspect": { "json": { - "customProperties": { - "createdFrom": "Dataset", - "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", - "datasetWebUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details" - }, - "title": "test_tile", - "description": "test_tile", - "lastModified": { - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - }, - "inputs": [ - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)" - } + "typeNames": [ + "Table" ] } }, @@ -893,8 +895,8 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -909,15 +911,19 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "datasetProperties", "aspect": { "json": { - "typeNames": [ - "PowerBI Tile" - ] + "customProperties": { + "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details", + "name": "dbo_book_issue", + "description": "hr pbi test description", + "tags": [] } }, "systemMetadata": { @@ -927,14 +933,15 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", "changeType": "UPSERT", - "aspectName": "chartKey", + "aspectName": "browsePaths", "aspect": { "json": { - "dashboardTool": "powerbi", - "chartId": "charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0" + "paths": [ + "/powerbi/demo-workspace" + ] } }, "systemMetadata": { @@ -944,14 +951,14 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", "changeType": "UPSERT", - "aspectName": "browsePaths", + "aspectName": "subTypes", "aspect": { "json": { - "paths": [ - "/powerbi/demo-workspace" + "typeNames": [ + "Table" ] } }, @@ -962,18 +969,67 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "changeType": "PATCH", + "aspectName": "dashboardInfo", "aspect": { - "json": { - "path": [ - { - "id": "demo-workspace" + "json": [ + { + "op": "add", + "path": "/customProperties/chartCount", + "value": "2" + }, + { + "op": "add", + "path": "/customProperties/workspaceName", + "value": "demo-workspace" + }, + { + "op": "add", + "path": "/customProperties/workspaceId", + "value": "64ED5CAD-7C10-4684-8180-826122881108" + }, + { + "op": "add", + "path": "/title", + "value": "test_dashboard" + }, + { + "op": "add", + "path": "/description", + "value": "Description of test dashboard" + }, + { + "op": "add", + "path": "/charts/urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "value": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)" + }, + { + "op": "add", + "path": "/charts/urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "value": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)" + }, + { + "op": "add", + "path": "/dashboardUrl", + "value": "https://localhost/dashboards/web/1" + }, + { + "op": "add", + "path": "/lastModified", + "value": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } } - ] - } + } + ] }, "systemMetadata": { "lastObserved": 1643871600000, @@ -982,35 +1038,15 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", "changeType": "UPSERT", - "aspectName": "chartInfo", + "aspectName": "globalTags", "aspect": { "json": { - "customProperties": { - "createdFrom": "Dataset", - "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed", - "datasetWebUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details" - }, - "title": "yearly_sales", - "description": "yearly_sales", - "lastModified": { - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - }, - "inputs": [ - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)" - }, + "tags": [ { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)" + "tag": "urn:li:tag:Promoted" } ] } @@ -1022,8 +1058,8 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -1038,14 +1074,14 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", "changeType": "UPSERT", - "aspectName": "chartKey", + "aspectName": "dashboardKey", "aspect": { "json": { "dashboardTool": "powerbi", - "chartId": "charts.23212598-23b5-4980-87cc-5fc0ecd84385" + "dashboardId": "powerbi.linkedin.com/dashboards/7D668CAD-7FFC-4505-9215-655BCA5BEBAE" } }, "systemMetadata": { @@ -1055,14 +1091,16 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "PowerBI Tile" + "path": [ + { + "id": "demo-workspace" + } ] } }, @@ -1073,15 +1111,27 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", "changeType": "UPSERT", - "aspectName": "browsePaths", + "aspectName": "ownership", "aspect": { "json": { - "paths": [ - "/powerbi/demo-workspace" - ] + "owners": [ + { + "owner": "urn:li:corpuser:users.User1@foo.com", + "type": "NONE" + }, + { + "owner": "urn:li:corpuser:users.User2@foo.com", + "type": "NONE" + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } } }, "systemMetadata": { @@ -1091,15 +1141,15 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "globalTags", "aspect": { "json": { - "path": [ + "tags": [ { - "id": "demo-workspace" + "tag": "urn:li:tag:Promoted" } ] } @@ -1111,15 +1161,15 @@ } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", "changeType": "UPSERT", - "aspectName": "browsePaths", + "aspectName": "viewProperties", "aspect": { "json": { - "paths": [ - "/powerbi/demo-workspace" - ] + "materialized": false, + "viewLogic": "let\n Source = Oracle.Database(\"localhost:1521/salesdb.domain.com\", [HierarchicalNavigation=true]), HR = Source{[Schema=\"HR\"]}[Data], EMPLOYEES1 = HR{[Name=\"EMPLOYEES\"]}[Data] \n in EMPLOYEES1", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -1129,67 +1179,16 @@ } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", - "changeType": "PATCH", - "aspectName": "dashboardInfo", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "changeType": "UPSERT", + "aspectName": "viewProperties", "aspect": { - "json": [ - { - "op": "add", - "path": "/customProperties/chartCount", - "value": "2" - }, - { - "op": "add", - "path": "/customProperties/workspaceName", - "value": "demo-workspace" - }, - { - "op": "add", - "path": "/customProperties/workspaceId", - "value": "64ED5CAD-7C10-4684-8180-826122881108" - }, - { - "op": "add", - "path": "/title", - "value": "test_dashboard" - }, - { - "op": "add", - "path": "/description", - "value": "Description of test dashboard" - }, - { - "op": "add", - "path": "/charts/urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", - "value": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)" - }, - { - "op": "add", - "path": "/charts/urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", - "value": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)" - }, - { - "op": "add", - "path": "/dashboardUrl", - "value": "https://localhost/dashboards/web/1" - }, - { - "op": "add", - "path": "/lastModified", - "value": { - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - } - ] + "json": { + "materialized": false, + "viewLogic": "let\n Source = PostgreSQL.Database(\"localhost\" , \"mics\" ),\n public_order_date = Source{[Schema=\"public\",Item=\"order_date\"]}[Data] \n in \n public_order_date", + "viewLanguage": "m_query" + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -1198,13 +1197,19 @@ } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "datasetProperties", "aspect": { "json": { - "removed": false + "customProperties": { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", + "name": "postgres_test_table", + "description": "Library dataset description", + "tags": [] } }, "systemMetadata": { @@ -1214,14 +1219,15 @@ } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", "changeType": "UPSERT", - "aspectName": "dashboardKey", + "aspectName": "subTypes", "aspect": { "json": { - "dashboardTool": "powerbi", - "dashboardId": "powerbi.linkedin.com/dashboards/7D668CAD-7FFC-4505-9215-655BCA5BEBAE" + "typeNames": [ + "Table" + ] } }, "systemMetadata": { @@ -1231,27 +1237,13 @@ } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", "changeType": "UPSERT", - "aspectName": "ownership", + "aspectName": "status", "aspect": { "json": { - "owners": [ - { - "owner": "urn:li:corpuser:users.User1@foo.com", - "type": "NONE" - }, - { - "owner": "urn:li:corpuser:users.User2@foo.com", - "type": "NONE" - } - ], - "ownerTypes": {}, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } + "removed": false } }, "systemMetadata": { @@ -1261,17 +1253,19 @@ } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "datasetProperties", "aspect": { "json": { - "path": [ - { - "id": "demo-workspace" - } - ] + "customProperties": { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", + "name": "job-history", + "description": "Library dataset description", + "tags": [] } }, "systemMetadata": { @@ -1281,13 +1275,15 @@ } }, { - "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:users.User1@foo.com", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "subTypes", "aspect": { "json": { - "removed": false + "typeNames": [ + "Table" + ] } }, "systemMetadata": { @@ -1297,8 +1293,8 @@ } }, { - "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:users.User2@foo.com", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -1313,13 +1309,17 @@ } }, { - "entityType": "tag", - "entityUrn": "urn:li:tag:Promoted", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", "changeType": "UPSERT", - "aspectName": "tagKey", + "aspectName": "globalTags", "aspect": { "json": { - "name": "Promoted" + "tags": [ + { + "tag": "urn:li:tag:Promoted" + } + ] } }, "systemMetadata": { diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_ingest.json b/metadata-ingestion/tests/integration/powerbi/golden_test_ingest.json index 83f8f881835b7..8290d996742f8 100644 --- a/metadata-ingestion/tests/integration/powerbi/golden_test_ingest.json +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_ingest.json @@ -18,19 +18,13 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:users.User2@foo.com", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "corpUserKey", "aspect": { "json": { - "customProperties": { - "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" - }, - "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "public issue_history", - "description": "Library dataset description", - "tags": [] + "username": "User2@foo.com" } }, "systemMetadata": { @@ -59,42 +53,6 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Table" - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", - "changeType": "UPSERT", - "aspectName": "viewProperties", - "aspect": { - "json": { - "materialized": false, - "viewLogic": "let\n Source = Snowflake.Databases(\"hp123rt5.ap-southeast-2.fakecomputing.com\",\"PBI_TEST_WAREHOUSE_PROD\",[Role=\"PBI_TEST_MEMBER\"]),\n PBI_TEST_Database = Source{[Name=\"PBI_TEST\",Kind=\"Database\"]}[Data],\n TEST_Schema = PBI_TEST_Database{[Name=\"TEST\",Kind=\"Schema\"]}[Data],\n TESTTABLE_Table = TEST_Schema{[Name=\"TESTTABLE\",Kind=\"Table\"]}[Data]\nin\n TESTTABLE_Table", - "viewLanguage": "m_query" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", - "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { "json": { @@ -102,7 +60,7 @@ "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" }, "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "SNOWFLAKE_TESTTABLE", + "name": "public issue_history", "description": "Library dataset description", "tags": [] } @@ -114,8 +72,8 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:users.User2@foo.com", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -131,7 +89,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -148,15 +106,15 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "browsePaths", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"SELECT#(lf)concat((UPPER(REPLACE(SELLER,'-',''))), MONTHID) as AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTHID) as CD_AGENT_KEY,#(lf) *#(lf)FROM#(lf)OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_APS_SME_UNITS_V4\", null, [EnableFolding=true]),\n #\"Added Conditional Column\" = Table.AddColumn(Source, \"SME Units ENT\", each if [DEAL_TYPE] = \"SME Unit\" then [UNIT] else 0),\n #\"Added Conditional Column1\" = Table.AddColumn(#\"Added Conditional Column\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" then [UNIT] else 0),\n #\"Removed Columns\" = Table.RemoveColumns(#\"Added Conditional Column1\",{\"Banklink Units\"}),\n #\"Added Custom\" = Table.AddColumn(#\"Removed Columns\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" and [SALES_TYPE] = \"3 - Upsell\"\nthen [UNIT]\n\nelse if [SALES_TYPE] = \"Adjusted BL Migration\"\nthen [UNIT]\n\nelse 0),\n #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"SME Units in $ (*$361)\", each if [DEAL_TYPE] = \"SME Unit\" \nand [SALES_TYPE] <> \"4 - Renewal\"\n then [UNIT] * 361\nelse 0),\n #\"Added Custom2\" = Table.AddColumn(#\"Added Custom1\", \"Banklink in $ (*$148)\", each [Banklink Units] * 148)\nin\n #\"Added Custom2\"", - "viewLanguage": "m_query" + "paths": [ + "/powerbi/demo-workspace" + ] } }, "systemMetadata": { @@ -166,19 +124,13 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "status", "aspect": { "json": { - "customProperties": { - "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" - }, - "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "snowflake native-query", - "description": "Library dataset description", - "tags": [] + "removed": false } }, "systemMetadata": { @@ -189,12 +141,14 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "viewProperties", "aspect": { "json": { - "removed": false + "materialized": false, + "viewLogic": "let\n Source = Snowflake.Databases(\"hp123rt5.ap-southeast-2.fakecomputing.com\",\"PBI_TEST_WAREHOUSE_PROD\",[Role=\"PBI_TEST_MEMBER\"]),\n PBI_TEST_Database = Source{[Name=\"PBI_TEST\",Kind=\"Database\"]}[Data],\n TEST_Schema = PBI_TEST_Database{[Name=\"TEST\",Kind=\"Schema\"]}[Data],\n TESTTABLE_Table = TEST_Schema{[Name=\"TESTTABLE\",Kind=\"Table\"]}[Data]\nin\n TESTTABLE_Table", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -204,14 +158,14 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { "json": { "typeNames": [ - "Table" + "PowerBI Tile" ] } }, @@ -223,14 +177,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "status", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = GoogleBigQuery.Database([BillingProject = #\"Parameter - Source\"]),\n#\"gcp-project\" = Source{[Name=#\"Parameter - Source\"]}[Data],\nuniversal_Schema = #\"gcp-project\"{[Name=\"universal\",Kind=\"Schema\"]}[Data],\nD_WH_DATE_Table = universal_Schema{[Name=\"D_WH_DATE\",Kind=\"Table\"]}[Data],\n#\"Filtered Rows\" = Table.SelectRows(D_WH_DATE_Table, each [D_DATE] > #datetime(2019, 9, 10, 0, 0, 0)),\n#\"Filtered Rows1\" = Table.SelectRows(#\"Filtered Rows\", each DateTime.IsInPreviousNHours([D_DATE], 87600))\n in \n#\"Filtered Rows1\"", - "viewLanguage": "m_query" + "removed": false } }, "systemMetadata": { @@ -241,7 +193,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { @@ -250,7 +202,7 @@ "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" }, "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "big-query-with-parameter", + "name": "SNOWFLAKE_TESTTABLE", "description": "Library dataset description", "tags": [] } @@ -263,12 +215,14 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "viewProperties", "aspect": { "json": { - "removed": false + "materialized": false, + "viewLogic": "let\n Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"SELECT#(lf)concat((UPPER(REPLACE(SELLER,'-',''))), MONTHID) as AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTHID) as CD_AGENT_KEY,#(lf) *#(lf)FROM#(lf)OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_APS_SME_UNITS_V4\", null, [EnableFolding=true]),\n #\"Added Conditional Column\" = Table.AddColumn(Source, \"SME Units ENT\", each if [DEAL_TYPE] = \"SME Unit\" then [UNIT] else 0),\n #\"Added Conditional Column1\" = Table.AddColumn(#\"Added Conditional Column\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" then [UNIT] else 0),\n #\"Removed Columns\" = Table.RemoveColumns(#\"Added Conditional Column1\",{\"Banklink Units\"}),\n #\"Added Custom\" = Table.AddColumn(#\"Removed Columns\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" and [SALES_TYPE] = \"3 - Upsell\"\nthen [UNIT]\n\nelse if [SALES_TYPE] = \"Adjusted BL Migration\"\nthen [UNIT]\n\nelse 0),\n #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"SME Units in $ (*$361)\", each if [DEAL_TYPE] = \"SME Unit\" \nand [SALES_TYPE] <> \"4 - Renewal\"\n then [UNIT] * 361\nelse 0),\n #\"Added Custom2\" = Table.AddColumn(#\"Added Custom1\", \"Banklink in $ (*$148)\", each [Banklink Units] * 148)\nin\n #\"Added Custom2\"", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -278,14 +232,16 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "Table" + "path": [ + { + "id": "demo-workspace" + } ] } }, @@ -297,14 +253,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "status", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = Value.NativeQuery(Snowflake.Databases(\"xaa48144.snowflakecomputing.com\",\"GSL_TEST_WH\",[Role=\"ACCOUNTADMIN\"]){[Name=\"GSL_TEST_DB\"]}[Data], \"select A.name from GSL_TEST_DB.PUBLIC.SALES_ANALYST as A inner join GSL_TEST_DB.PUBLIC.SALES_FORECAST as B on A.name = B.name where startswith(A.name, 'mo')\", null, [EnableFolding=true])\nin\n Source", - "viewLanguage": "m_query" + "removed": false } }, "systemMetadata": { @@ -315,7 +269,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { @@ -324,7 +278,7 @@ "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" }, "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "snowflake native-query-with-join", + "name": "snowflake native-query", "description": "Library dataset description", "tags": [] } @@ -337,12 +291,14 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "subTypes", "aspect": { "json": { - "removed": false + "typeNames": [ + "Table" + ] } }, "systemMetadata": { @@ -353,7 +309,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -370,15 +326,14 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "chartKey", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = Oracle.Database(\"localhost:1521/salesdb.domain.com\", [HierarchicalNavigation=true]), HR = Source{[Schema=\"HR\"]}[Data], EMPLOYEES1 = HR{[Name=\"EMPLOYEES\"]}[Data] \n in EMPLOYEES1", - "viewLanguage": "m_query" + "dashboardTool": "powerbi", + "chartId": "charts.23212598-23b5-4980-87cc-5fc0ecd84385" } }, "systemMetadata": { @@ -388,19 +343,37 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "chartInfo", "aspect": { "json": { "customProperties": { - "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" + "createdFrom": "Dataset", + "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed", + "datasetWebUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details" }, - "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "job-history", - "description": "Library dataset description", - "tags": [] + "title": "yearly_sales", + "description": "yearly_sales", + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + }, + "inputs": [ + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)" + } + ] } }, "systemMetadata": { @@ -411,12 +384,14 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "viewProperties", "aspect": { "json": { - "removed": false + "materialized": false, + "viewLogic": "let\n Source = GoogleBigQuery.Database([BillingProject = #\"Parameter - Source\"]),\n#\"gcp-project\" = Source{[Name=#\"Parameter - Source\"]}[Data],\nuniversal_Schema = #\"gcp-project\"{[Name=\"universal\",Kind=\"Schema\"]}[Data],\nD_WH_DATE_Table = universal_Schema{[Name=\"D_WH_DATE\",Kind=\"Table\"]}[Data],\n#\"Filtered Rows\" = Table.SelectRows(D_WH_DATE_Table, each [D_DATE] > #datetime(2019, 9, 10, 0, 0, 0)),\n#\"Filtered Rows1\" = Table.SelectRows(#\"Filtered Rows\", each DateTime.IsInPreviousNHours([D_DATE], 87600))\n in \n#\"Filtered Rows1\"", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -426,15 +401,13 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:users.User1@foo.com", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "corpUserKey", "aspect": { "json": { - "typeNames": [ - "Table" - ] + "username": "User1@foo.com" } }, "systemMetadata": { @@ -445,14 +418,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "status", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = PostgreSQL.Database(\"localhost\" , \"mics\" ),\n public_order_date = Source{[Schema=\"public\",Item=\"order_date\"]}[Data] \n in \n public_order_date", - "viewLanguage": "m_query" + "removed": false } }, "systemMetadata": { @@ -463,7 +434,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { @@ -472,7 +443,7 @@ "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" }, "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "postgres_test_table", + "name": "big-query-with-parameter", "description": "Library dataset description", "tags": [] } @@ -484,8 +455,8 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:users.User1@foo.com", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -501,7 +472,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -518,53 +489,52 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", "changeType": "UPSERT", - "aspectName": "viewProperties", - "aspect": { - "json": { - "materialized": false, - "viewLogic": "let\n Source = Sql.Database(\"localhost\", \"library\"),\n dbo_book_issue = Source{[Schema=\"dbo\",Item=\"book_issue\"]}[Data]\n in dbo_book_issue", - "viewLanguage": "m_query" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", - "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "chartInfo", "aspect": { "json": { "customProperties": { - "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed" + "createdFrom": "Dataset", + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", + "datasetWebUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details" }, - "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details", - "name": "dbo_book_issue", - "description": "hr pbi test description", - "tags": [] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false + "title": "test_tile", + "description": "test_tile", + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + }, + "inputs": [ + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)" + } + ] } }, "systemMetadata": { @@ -574,14 +544,14 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePaths", "aspect": { "json": { - "typeNames": [ - "Table" + "paths": [ + "/powerbi/demo-workspace" ] } }, @@ -593,13 +563,13 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { "json": { "materialized": false, - "viewLogic": "let\n Source = Sql.Database(\"AUPRDWHDB\", \"COMMOPSDB\", [Query=\"select *,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTH_WID) as CD_AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,'-',''))), MONTH_WID) as AGENT_KEY#(lf)#(lf)from V_PS_CD_RETENTION\", CommandTimeout=#duration(0, 1, 30, 0)]),\n #\"Changed Type\" = Table.TransformColumnTypes(Source,{{\"mth_date\", type date}}),\n #\"Added Custom\" = Table.AddColumn(#\"Changed Type\", \"Month\", each Date.Month([mth_date])),\n #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"TPV Opening\", each if [Month] = 1 then [TPV_AMV_OPENING]\nelse if [Month] = 2 then 0\nelse if [Month] = 3 then 0\nelse if [Month] = 4 then [TPV_AMV_OPENING]\nelse if [Month] = 5 then 0\nelse if [Month] = 6 then 0\nelse if [Month] = 7 then [TPV_AMV_OPENING]\nelse if [Month] = 8 then 0\nelse if [Month] = 9 then 0\nelse if [Month] = 10 then [TPV_AMV_OPENING]\nelse if [Month] = 11 then 0\nelse if [Month] = 12 then 0\n\nelse 0)\nin\n #\"Added Custom1\"", + "viewLogic": "let\n Source = Value.NativeQuery(Snowflake.Databases(\"xaa48144.snowflakecomputing.com\",\"GSL_TEST_WH\",[Role=\"ACCOUNTADMIN\"]){[Name=\"GSL_TEST_DB\"]}[Data], \"select A.name from GSL_TEST_DB.PUBLIC.SALES_ANALYST as A inner join GSL_TEST_DB.PUBLIC.SALES_FORECAST as B on A.name = B.name where startswith(A.name, 'mo')\", null, [EnableFolding=true])\nin\n Source", "viewLanguage": "m_query" } }, @@ -610,19 +580,13 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "status", "aspect": { "json": { - "customProperties": { - "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed" - }, - "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details", - "name": "ms_sql_native_table", - "description": "hr pbi test description", - "tags": [] + "removed": false } }, "systemMetadata": { @@ -633,7 +597,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -649,14 +613,18 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "datasetProperties", "aspect": { "json": { - "typeNames": [ - "Table" - ] + "customProperties": { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", + "name": "snowflake native-query-with-join", + "description": "Library dataset description", + "tags": [] } }, "systemMetadata": { @@ -666,13 +634,15 @@ } }, { - "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:users.User1@foo.com", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", "changeType": "UPSERT", - "aspectName": "corpUserKey", + "aspectName": "viewProperties", "aspect": { "json": { - "username": "User1@foo.com" + "materialized": false, + "viewLogic": "let\n Source = Sql.Database(\"AUPRDWHDB\", \"COMMOPSDB\", [Query=\"select *,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTH_WID) as CD_AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,'-',''))), MONTH_WID) as AGENT_KEY#(lf)#(lf)from V_PS_CD_RETENTION\", CommandTimeout=#duration(0, 1, 30, 0)]),\n #\"Changed Type\" = Table.TransformColumnTypes(Source,{{\"mth_date\", type date}}),\n #\"Added Custom\" = Table.AddColumn(#\"Changed Type\", \"Month\", each Date.Month([mth_date])),\n #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"TPV Opening\", each if [Month] = 1 then [TPV_AMV_OPENING]\nelse if [Month] = 2 then 0\nelse if [Month] = 3 then 0\nelse if [Month] = 4 then [TPV_AMV_OPENING]\nelse if [Month] = 5 then 0\nelse if [Month] = 6 then 0\nelse if [Month] = 7 then [TPV_AMV_OPENING]\nelse if [Month] = 8 then 0\nelse if [Month] = 9 then 0\nelse if [Month] = 10 then [TPV_AMV_OPENING]\nelse if [Month] = 11 then 0\nelse if [Month] = 12 then 0\n\nelse 0)\nin\n #\"Added Custom1\"", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -682,13 +652,15 @@ } }, { - "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:users.User2@foo.com", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", "changeType": "UPSERT", - "aspectName": "corpUserKey", + "aspectName": "viewProperties", "aspect": { "json": { - "username": "User2@foo.com" + "materialized": false, + "viewLogic": "let\n Source = Sql.Database(\"localhost\", \"library\"),\n dbo_book_issue = Source{[Schema=\"dbo\",Item=\"book_issue\"]}[Data]\n in dbo_book_issue", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -698,52 +670,13 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", "changeType": "UPSERT", - "aspectName": "chartInfo", + "aspectName": "status", "aspect": { "json": { - "customProperties": { - "createdFrom": "Dataset", - "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", - "datasetWebUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details" - }, - "title": "test_tile", - "description": "test_tile", - "lastModified": { - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - }, - "inputs": [ - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)" - } - ] + "removed": false } }, "systemMetadata": { @@ -753,13 +686,19 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "datasetProperties", "aspect": { "json": { - "removed": false + "customProperties": { + "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details", + "name": "ms_sql_native_table", + "description": "hr pbi test description", + "tags": [] } }, "systemMetadata": { @@ -787,31 +726,14 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", - "changeType": "UPSERT", - "aspectName": "chartKey", - "aspect": { - "json": { - "dashboardTool": "powerbi", - "chartId": "charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", "changeType": "UPSERT", - "aspectName": "browsePaths", + "aspectName": "subTypes", "aspect": { "json": { - "paths": [ - "/powerbi/demo-workspace" + "typeNames": [ + "Table" ] } }, @@ -822,17 +744,13 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "status", "aspect": { "json": { - "path": [ - { - "id": "demo-workspace" - } - ] + "removed": false } }, "systemMetadata": { @@ -842,53 +760,19 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", "changeType": "UPSERT", - "aspectName": "chartInfo", + "aspectName": "datasetProperties", "aspect": { "json": { "customProperties": { - "createdFrom": "Dataset", - "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed", - "datasetWebUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details" - }, - "title": "yearly_sales", - "description": "yearly_sales", - "lastModified": { - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } + "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed" }, - "inputs": [ - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details", + "name": "dbo_book_issue", + "description": "hr pbi test description", + "tags": [] } }, "systemMetadata": { @@ -898,14 +782,15 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", "changeType": "UPSERT", - "aspectName": "chartKey", + "aspectName": "subTypes", "aspect": { "json": { - "dashboardTool": "powerbi", - "chartId": "charts.23212598-23b5-4980-87cc-5fc0ecd84385" + "typeNames": [ + "Table" + ] } }, "systemMetadata": { @@ -915,14 +800,14 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { "json": { "typeNames": [ - "PowerBI Tile" + "Table" ] } }, @@ -934,14 +819,13 @@ }, { "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", "changeType": "UPSERT", - "aspectName": "browsePaths", + "aspectName": "chartKey", "aspect": { "json": { - "paths": [ - "/powerbi/demo-workspace" - ] + "dashboardTool": "powerbi", + "chartId": "charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0" } }, "systemMetadata": { @@ -952,7 +836,7 @@ }, { "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -1091,27 +975,71 @@ } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", "changeType": "UPSERT", - "aspectName": "ownership", + "aspectName": "viewProperties", "aspect": { "json": { - "owners": [ - { - "owner": "urn:li:corpuser:users.User1@foo.com", - "type": "NONE" - }, - { - "owner": "urn:li:corpuser:users.User2@foo.com", - "type": "NONE" - } - ], - "ownerTypes": {}, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } + "materialized": false, + "viewLogic": "let\n Source = PostgreSQL.Database(\"localhost\" , \"mics\" ),\n public_order_date = Source{[Schema=\"public\",Item=\"order_date\"]}[Data] \n in \n public_order_date", + "viewLanguage": "m_query" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "let\n Source = Oracle.Database(\"localhost:1521/salesdb.domain.com\", [HierarchicalNavigation=true]), HR = Source{[Schema=\"HR\"]}[Data], EMPLOYEES1 = HR{[Name=\"EMPLOYEES\"]}[Data] \n in EMPLOYEES1", + "viewLanguage": "m_query" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", + "name": "postgres_test_table", + "description": "Library dataset description", + "tags": [] } }, "systemMetadata": { @@ -1141,13 +1069,15 @@ } }, { - "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:users.User1@foo.com", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "subTypes", "aspect": { "json": { - "removed": false + "typeNames": [ + "Table" + ] } }, "systemMetadata": { @@ -1157,8 +1087,8 @@ } }, { - "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:users.User2@foo.com", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -1171,5 +1101,75 @@ "runId": "powerbi-test", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", + "name": "job-history", + "description": "Library dataset description", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:users.User1@foo.com", + "type": "NONE" + }, + { + "owner": "urn:li:corpuser:users.User2@foo.com", + "type": "NONE" + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_ingest_patch_disabled.json b/metadata-ingestion/tests/integration/powerbi/golden_test_ingest_patch_disabled.json index 93a2c533d21ca..cc20051f9525b 100644 --- a/metadata-ingestion/tests/integration/powerbi/golden_test_ingest_patch_disabled.json +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_ingest_patch_disabled.json @@ -18,19 +18,13 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:users.User2@foo.com", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "corpUserKey", "aspect": { "json": { - "customProperties": { - "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" - }, - "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "public issue_history", - "description": "Library dataset description", - "tags": [] + "username": "User2@foo.com" } }, "systemMetadata": { @@ -59,42 +53,6 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Table" - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", - "changeType": "UPSERT", - "aspectName": "viewProperties", - "aspect": { - "json": { - "materialized": false, - "viewLogic": "let\n Source = Snowflake.Databases(\"hp123rt5.ap-southeast-2.fakecomputing.com\",\"PBI_TEST_WAREHOUSE_PROD\",[Role=\"PBI_TEST_MEMBER\"]),\n PBI_TEST_Database = Source{[Name=\"PBI_TEST\",Kind=\"Database\"]}[Data],\n TEST_Schema = PBI_TEST_Database{[Name=\"TEST\",Kind=\"Schema\"]}[Data],\n TESTTABLE_Table = TEST_Schema{[Name=\"TESTTABLE\",Kind=\"Table\"]}[Data]\nin\n TESTTABLE_Table", - "viewLanguage": "m_query" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", - "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { "json": { @@ -102,7 +60,7 @@ "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" }, "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "SNOWFLAKE_TESTTABLE", + "name": "public issue_history", "description": "Library dataset description", "tags": [] } @@ -114,8 +72,8 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:users.User2@foo.com", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -131,7 +89,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -148,15 +106,15 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "browsePaths", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"SELECT#(lf)concat((UPPER(REPLACE(SELLER,'-',''))), MONTHID) as AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTHID) as CD_AGENT_KEY,#(lf) *#(lf)FROM#(lf)OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_APS_SME_UNITS_V4\", null, [EnableFolding=true]),\n #\"Added Conditional Column\" = Table.AddColumn(Source, \"SME Units ENT\", each if [DEAL_TYPE] = \"SME Unit\" then [UNIT] else 0),\n #\"Added Conditional Column1\" = Table.AddColumn(#\"Added Conditional Column\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" then [UNIT] else 0),\n #\"Removed Columns\" = Table.RemoveColumns(#\"Added Conditional Column1\",{\"Banklink Units\"}),\n #\"Added Custom\" = Table.AddColumn(#\"Removed Columns\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" and [SALES_TYPE] = \"3 - Upsell\"\nthen [UNIT]\n\nelse if [SALES_TYPE] = \"Adjusted BL Migration\"\nthen [UNIT]\n\nelse 0),\n #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"SME Units in $ (*$361)\", each if [DEAL_TYPE] = \"SME Unit\" \nand [SALES_TYPE] <> \"4 - Renewal\"\n then [UNIT] * 361\nelse 0),\n #\"Added Custom2\" = Table.AddColumn(#\"Added Custom1\", \"Banklink in $ (*$148)\", each [Banklink Units] * 148)\nin\n #\"Added Custom2\"", - "viewLanguage": "m_query" + "paths": [ + "/powerbi/demo-workspace" + ] } }, "systemMetadata": { @@ -166,19 +124,13 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "status", "aspect": { "json": { - "customProperties": { - "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" - }, - "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "snowflake native-query", - "description": "Library dataset description", - "tags": [] + "removed": false } }, "systemMetadata": { @@ -189,12 +141,14 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "viewProperties", "aspect": { "json": { - "removed": false + "materialized": false, + "viewLogic": "let\n Source = Snowflake.Databases(\"hp123rt5.ap-southeast-2.fakecomputing.com\",\"PBI_TEST_WAREHOUSE_PROD\",[Role=\"PBI_TEST_MEMBER\"]),\n PBI_TEST_Database = Source{[Name=\"PBI_TEST\",Kind=\"Database\"]}[Data],\n TEST_Schema = PBI_TEST_Database{[Name=\"TEST\",Kind=\"Schema\"]}[Data],\n TESTTABLE_Table = TEST_Schema{[Name=\"TESTTABLE\",Kind=\"Table\"]}[Data]\nin\n TESTTABLE_Table", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -204,14 +158,14 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { "json": { "typeNames": [ - "Table" + "PowerBI Tile" ] } }, @@ -223,14 +177,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "status", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = GoogleBigQuery.Database([BillingProject = #\"Parameter - Source\"]),\n#\"gcp-project\" = Source{[Name=#\"Parameter - Source\"]}[Data],\nuniversal_Schema = #\"gcp-project\"{[Name=\"universal\",Kind=\"Schema\"]}[Data],\nD_WH_DATE_Table = universal_Schema{[Name=\"D_WH_DATE\",Kind=\"Table\"]}[Data],\n#\"Filtered Rows\" = Table.SelectRows(D_WH_DATE_Table, each [D_DATE] > #datetime(2019, 9, 10, 0, 0, 0)),\n#\"Filtered Rows1\" = Table.SelectRows(#\"Filtered Rows\", each DateTime.IsInPreviousNHours([D_DATE], 87600))\n in \n#\"Filtered Rows1\"", - "viewLanguage": "m_query" + "removed": false } }, "systemMetadata": { @@ -241,7 +193,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { @@ -250,7 +202,7 @@ "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" }, "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "big-query-with-parameter", + "name": "SNOWFLAKE_TESTTABLE", "description": "Library dataset description", "tags": [] } @@ -263,12 +215,14 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "viewProperties", "aspect": { "json": { - "removed": false + "materialized": false, + "viewLogic": "let\n Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"SELECT#(lf)concat((UPPER(REPLACE(SELLER,'-',''))), MONTHID) as AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTHID) as CD_AGENT_KEY,#(lf) *#(lf)FROM#(lf)OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_APS_SME_UNITS_V4\", null, [EnableFolding=true]),\n #\"Added Conditional Column\" = Table.AddColumn(Source, \"SME Units ENT\", each if [DEAL_TYPE] = \"SME Unit\" then [UNIT] else 0),\n #\"Added Conditional Column1\" = Table.AddColumn(#\"Added Conditional Column\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" then [UNIT] else 0),\n #\"Removed Columns\" = Table.RemoveColumns(#\"Added Conditional Column1\",{\"Banklink Units\"}),\n #\"Added Custom\" = Table.AddColumn(#\"Removed Columns\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" and [SALES_TYPE] = \"3 - Upsell\"\nthen [UNIT]\n\nelse if [SALES_TYPE] = \"Adjusted BL Migration\"\nthen [UNIT]\n\nelse 0),\n #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"SME Units in $ (*$361)\", each if [DEAL_TYPE] = \"SME Unit\" \nand [SALES_TYPE] <> \"4 - Renewal\"\n then [UNIT] * 361\nelse 0),\n #\"Added Custom2\" = Table.AddColumn(#\"Added Custom1\", \"Banklink in $ (*$148)\", each [Banklink Units] * 148)\nin\n #\"Added Custom2\"", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -278,14 +232,16 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "Table" + "path": [ + { + "id": "demo-workspace" + } ] } }, @@ -297,14 +253,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "status", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = Value.NativeQuery(Snowflake.Databases(\"xaa48144.snowflakecomputing.com\",\"GSL_TEST_WH\",[Role=\"ACCOUNTADMIN\"]){[Name=\"GSL_TEST_DB\"]}[Data], \"select A.name from GSL_TEST_DB.PUBLIC.SALES_ANALYST as A inner join GSL_TEST_DB.PUBLIC.SALES_FORECAST as B on A.name = B.name where startswith(A.name, 'mo')\", null, [EnableFolding=true])\nin\n Source", - "viewLanguage": "m_query" + "removed": false } }, "systemMetadata": { @@ -315,7 +269,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { @@ -324,7 +278,7 @@ "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" }, "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "snowflake native-query-with-join", + "name": "snowflake native-query", "description": "Library dataset description", "tags": [] } @@ -337,12 +291,14 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "subTypes", "aspect": { "json": { - "removed": false + "typeNames": [ + "Table" + ] } }, "systemMetadata": { @@ -353,7 +309,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -370,15 +326,14 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "chartKey", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = Oracle.Database(\"localhost:1521/salesdb.domain.com\", [HierarchicalNavigation=true]), HR = Source{[Schema=\"HR\"]}[Data], EMPLOYEES1 = HR{[Name=\"EMPLOYEES\"]}[Data] \n in EMPLOYEES1", - "viewLanguage": "m_query" + "dashboardTool": "powerbi", + "chartId": "charts.23212598-23b5-4980-87cc-5fc0ecd84385" } }, "systemMetadata": { @@ -388,19 +343,37 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "chartInfo", "aspect": { "json": { "customProperties": { - "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" + "createdFrom": "Dataset", + "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed", + "datasetWebUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details" }, - "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "job-history", - "description": "Library dataset description", - "tags": [] + "title": "yearly_sales", + "description": "yearly_sales", + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + }, + "inputs": [ + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)" + } + ] } }, "systemMetadata": { @@ -411,12 +384,14 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "viewProperties", "aspect": { "json": { - "removed": false + "materialized": false, + "viewLogic": "let\n Source = GoogleBigQuery.Database([BillingProject = #\"Parameter - Source\"]),\n#\"gcp-project\" = Source{[Name=#\"Parameter - Source\"]}[Data],\nuniversal_Schema = #\"gcp-project\"{[Name=\"universal\",Kind=\"Schema\"]}[Data],\nD_WH_DATE_Table = universal_Schema{[Name=\"D_WH_DATE\",Kind=\"Table\"]}[Data],\n#\"Filtered Rows\" = Table.SelectRows(D_WH_DATE_Table, each [D_DATE] > #datetime(2019, 9, 10, 0, 0, 0)),\n#\"Filtered Rows1\" = Table.SelectRows(#\"Filtered Rows\", each DateTime.IsInPreviousNHours([D_DATE], 87600))\n in \n#\"Filtered Rows1\"", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -426,15 +401,13 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:users.User1@foo.com", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "corpUserKey", "aspect": { "json": { - "typeNames": [ - "Table" - ] + "username": "User1@foo.com" } }, "systemMetadata": { @@ -445,14 +418,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "status", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = PostgreSQL.Database(\"localhost\" , \"mics\" ),\n public_order_date = Source{[Schema=\"public\",Item=\"order_date\"]}[Data] \n in \n public_order_date", - "viewLanguage": "m_query" + "removed": false } }, "systemMetadata": { @@ -463,7 +434,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { @@ -472,7 +443,7 @@ "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" }, "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "postgres_test_table", + "name": "big-query-with-parameter", "description": "Library dataset description", "tags": [] } @@ -484,8 +455,8 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:users.User1@foo.com", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -501,7 +472,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -518,37 +489,70 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", "changeType": "UPSERT", - "aspectName": "viewProperties", - "aspect": { - "json": { - "materialized": false, - "viewLogic": "let\n Source = Sql.Database(\"localhost\", \"library\"),\n dbo_book_issue = Source{[Schema=\"dbo\",Item=\"book_issue\"]}[Data]\n in dbo_book_issue", - "viewLanguage": "m_query" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", - "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "chartInfo", "aspect": { "json": { "customProperties": { - "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed" + "createdFrom": "Dataset", + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", + "datasetWebUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details" }, - "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details", - "name": "dbo_book_issue", - "description": "hr pbi test description", - "tags": [] + "title": "test_tile", + "description": "test_tile", + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + }, + "inputs": [ + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "changeType": "UPSERT", + "aspectName": "browsePaths", + "aspect": { + "json": { + "paths": [ + "/powerbi/demo-workspace" + ] } }, "systemMetadata": { @@ -559,7 +563,25 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "let\n Source = Value.NativeQuery(Snowflake.Databases(\"xaa48144.snowflakecomputing.com\",\"GSL_TEST_WH\",[Role=\"ACCOUNTADMIN\"]){[Name=\"GSL_TEST_DB\"]}[Data], \"select A.name from GSL_TEST_DB.PUBLIC.SALES_ANALYST as A inner join GSL_TEST_DB.PUBLIC.SALES_FORECAST as B on A.name = B.name where startswith(A.name, 'mo')\", null, [EnableFolding=true])\nin\n Source", + "viewLanguage": "m_query" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -575,14 +597,34 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "status", "aspect": { "json": { - "typeNames": [ - "Table" - ] + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", + "name": "snowflake native-query-with-join", + "description": "Library dataset description", + "tags": [] } }, "systemMetadata": { @@ -611,18 +653,14 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "viewProperties", "aspect": { "json": { - "customProperties": { - "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed" - }, - "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details", - "name": "ms_sql_native_table", - "description": "hr pbi test description", - "tags": [] + "materialized": false, + "viewLogic": "let\n Source = Sql.Database(\"localhost\", \"library\"),\n dbo_book_issue = Source{[Schema=\"dbo\",Item=\"book_issue\"]}[Data]\n in dbo_book_issue", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -651,11 +689,33 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details", + "name": "ms_sql_native_table", + "description": "hr pbi test description", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { "json": { "typeNames": [ - "Table" + "PowerBI Tile" ] } }, @@ -666,13 +726,15 @@ } }, { - "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:users.User1@foo.com", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", "changeType": "UPSERT", - "aspectName": "corpUserKey", + "aspectName": "subTypes", "aspect": { "json": { - "username": "User1@foo.com" + "typeNames": [ + "Table" + ] } }, "systemMetadata": { @@ -682,13 +744,13 @@ } }, { - "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:users.User2@foo.com", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", "changeType": "UPSERT", - "aspectName": "corpUserKey", + "aspectName": "status", "aspect": { "json": { - "username": "User2@foo.com" + "removed": false } }, "systemMetadata": { @@ -698,52 +760,19 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", "changeType": "UPSERT", - "aspectName": "chartInfo", + "aspectName": "datasetProperties", "aspect": { "json": { "customProperties": { - "createdFrom": "Dataset", - "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", - "datasetWebUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details" - }, - "title": "test_tile", - "description": "test_tile", - "lastModified": { - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } + "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed" }, - "inputs": [ - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)" - } - ] + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details", + "name": "dbo_book_issue", + "description": "hr pbi test description", + "tags": [] } }, "systemMetadata": { @@ -753,13 +782,15 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "subTypes", "aspect": { "json": { - "removed": false + "typeNames": [ + "Table" + ] } }, "systemMetadata": { @@ -769,14 +800,14 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { "json": { "typeNames": [ - "PowerBI Tile" + "Table" ] } }, @@ -807,11 +838,13 @@ "entityType": "chart", "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", "changeType": "UPSERT", - "aspectName": "browsePaths", + "aspectName": "browsePathsV2", "aspect": { "json": { - "paths": [ - "/powerbi/demo-workspace" + "path": [ + { + "id": "demo-workspace" + } ] } }, @@ -822,16 +855,14 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "browsePaths", "aspect": { "json": { - "path": [ - { - "id": "demo-workspace" - } + "paths": [ + "/powerbi/demo-workspace" ] } }, @@ -842,19 +873,25 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", "changeType": "UPSERT", - "aspectName": "chartInfo", + "aspectName": "dashboardInfo", "aspect": { "json": { "customProperties": { - "createdFrom": "Dataset", - "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed", - "datasetWebUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details" + "chartCount": "2", + "workspaceName": "demo-workspace", + "workspaceId": "64ED5CAD-7C10-4684-8180-826122881108" }, - "title": "yearly_sales", - "description": "yearly_sales", + "title": "test_dashboard", + "description": "Description of test dashboard", + "charts": [ + "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)" + ], + "datasets": [], + "dashboards": [], "lastModified": { "created": { "time": 0, @@ -865,14 +902,7 @@ "actor": "urn:li:corpuser:unknown" } }, - "inputs": [ - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)" - } - ] + "dashboardUrl": "https://localhost/dashboards/web/1" } }, "systemMetadata": { @@ -882,8 +912,8 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -898,15 +928,14 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "dashboardKey", "aspect": { "json": { - "typeNames": [ - "PowerBI Tile" - ] + "dashboardTool": "powerbi", + "dashboardId": "powerbi.linkedin.com/dashboards/7D668CAD-7FFC-4505-9215-655BCA5BEBAE" } }, "systemMetadata": { @@ -916,14 +945,15 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", "changeType": "UPSERT", - "aspectName": "chartKey", + "aspectName": "viewProperties", "aspect": { "json": { - "dashboardTool": "powerbi", - "chartId": "charts.23212598-23b5-4980-87cc-5fc0ecd84385" + "materialized": false, + "viewLogic": "let\n Source = PostgreSQL.Database(\"localhost\" , \"mics\" ),\n public_order_date = Source{[Schema=\"public\",Item=\"order_date\"]}[Data] \n in \n public_order_date", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -933,15 +963,15 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", "changeType": "UPSERT", - "aspectName": "browsePaths", + "aspectName": "viewProperties", "aspect": { "json": { - "paths": [ - "/powerbi/demo-workspace" - ] + "materialized": false, + "viewLogic": "let\n Source = Oracle.Database(\"localhost:1521/salesdb.domain.com\", [HierarchicalNavigation=true]), HR = Source{[Schema=\"HR\"]}[Data], EMPLOYEES1 = HR{[Name=\"EMPLOYEES\"]}[Data] \n in EMPLOYEES1", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -951,17 +981,13 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "status", "aspect": { "json": { - "path": [ - { - "id": "demo-workspace" - } - ] + "removed": false } }, "systemMetadata": { @@ -971,15 +997,19 @@ } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", "changeType": "UPSERT", - "aspectName": "browsePaths", + "aspectName": "datasetProperties", "aspect": { "json": { - "paths": [ - "/powerbi/demo-workspace" - ] + "customProperties": { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", + "name": "postgres_test_table", + "description": "Library dataset description", + "tags": [] } }, "systemMetadata": { @@ -992,32 +1022,14 @@ "entityType": "dashboard", "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", "changeType": "UPSERT", - "aspectName": "dashboardInfo", + "aspectName": "browsePathsV2", "aspect": { "json": { - "customProperties": { - "chartCount": "2", - "workspaceName": "demo-workspace", - "workspaceId": "64ED5CAD-7C10-4684-8180-826122881108" - }, - "title": "test_dashboard", - "description": "Description of test dashboard", - "charts": [ - "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", - "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)" - ], - "datasets": [], - "lastModified": { - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" + "path": [ + { + "id": "demo-workspace" } - }, - "dashboardUrl": "https://localhost/dashboards/web/1" + ] } }, "systemMetadata": { @@ -1027,8 +1039,26 @@ } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -1043,14 +1073,19 @@ } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", "changeType": "UPSERT", - "aspectName": "dashboardKey", + "aspectName": "datasetProperties", "aspect": { "json": { - "dashboardTool": "powerbi", - "dashboardId": "powerbi.linkedin.com/dashboards/7D668CAD-7FFC-4505-9215-655BCA5BEBAE" + "customProperties": { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", + "name": "job-history", + "description": "Library dataset description", + "tags": [] } }, "systemMetadata": { @@ -1090,16 +1125,14 @@ } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "subTypes", "aspect": { "json": { - "path": [ - { - "id": "demo-workspace" - } + "typeNames": [ + "Table" ] } }, @@ -1108,37 +1141,5 @@ "runId": "powerbi-test", "lastRunId": "no-run-id-provided" } -}, -{ - "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:users.User1@foo.com", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:users.User2@foo.com", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-test", - "lastRunId": "no-run-id-provided" - } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_lineage.json b/metadata-ingestion/tests/integration/powerbi/golden_test_lineage.json index eda831722cc91..af3f20260af23 100644 --- a/metadata-ingestion/tests/integration/powerbi/golden_test_lineage.json +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_lineage.json @@ -17,6 +17,38 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:users.User2@foo.com", + "changeType": "UPSERT", + "aspectName": "corpUserKey", + "aspect": { + "json": { + "username": "User2@foo.com" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-lineage-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-lineage-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", @@ -40,8 +72,8 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:users.User2@foo.com", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -73,6 +105,40 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "changeType": "UPSERT", + "aspectName": "browsePaths", + "aspect": { + "json": { + "paths": [ + "/powerbi/demo-workspace" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-lineage-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-lineage-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", @@ -114,13 +180,15 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "subTypes", "aspect": { "json": { - "removed": false + "typeNames": [ + "PowerBI Tile" + ] } }, "systemMetadata": { @@ -133,12 +201,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "status", "aspect": { "json": { - "typeNames": [ - "Table" - ] + "removed": false } }, "systemMetadata": { @@ -148,20 +214,15 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "upstreamLineage", + "aspectName": "browsePathsV2", "aspect": { "json": { - "upstreams": [ + "path": [ { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,sn-2.PBI_TEST.TEST.TESTTABLE,PROD)", - "type": "TRANSFORMED" + "id": "demo-workspace" } ] } @@ -212,6 +273,24 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-lineage-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", @@ -228,6 +307,23 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "changeType": "UPSERT", + "aspectName": "chartKey", + "aspect": { + "json": { + "dashboardTool": "powerbi", + "chartId": "charts.23212598-23b5-4980-87cc-5fc0ecd84385" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-lineage-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", @@ -246,9 +342,49 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "changeType": "UPSERT", + "aspectName": "chartInfo", + "aspect": { + "json": { + "customProperties": { + "createdFrom": "Dataset", + "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed", + "datasetWebUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details" + }, + "title": "yearly_sales", + "description": "yearly_sales", + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + }, + "inputs": [ + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-lineage-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", "changeType": "UPSERT", "aspectName": "upstreamLineage", "aspect": { @@ -259,7 +395,7 @@ "time": 0, "actor": "urn:li:corpuser:unknown" }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,sn-2.OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_APS_SME_UNITS_V4,PROD)", + "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,sn-2.PBI_TEST.TEST.TESTTABLE,PROD)", "type": "TRANSFORMED" } ] @@ -273,14 +409,21 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "upstreamLineage", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = GoogleBigQuery.Database([BillingProject = #\"Parameter - Source\"]),\n#\"gcp-project\" = Source{[Name=#\"Parameter - Source\"]}[Data],\nuniversal_Schema = #\"gcp-project\"{[Name=\"universal\",Kind=\"Schema\"]}[Data],\nD_WH_DATE_Table = universal_Schema{[Name=\"D_WH_DATE\",Kind=\"Table\"]}[Data],\n#\"Filtered Rows\" = Table.SelectRows(D_WH_DATE_Table, each [D_DATE] > #datetime(2019, 9, 10, 0, 0, 0)),\n#\"Filtered Rows1\" = Table.SelectRows(#\"Filtered Rows\", each DateTime.IsInPreviousNHours([D_DATE], 87600))\n in \n#\"Filtered Rows1\"", - "viewLanguage": "m_query" + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,sn-2.OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_APS_SME_UNITS_V4,PROD)", + "type": "TRANSFORMED" + } + ] } }, "systemMetadata": { @@ -290,19 +433,13 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:users.User1@foo.com", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "corpUserKey", "aspect": { "json": { - "customProperties": { - "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" - }, - "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "big-query-with-parameter", - "description": "Library dataset description", - "tags": [] + "username": "User1@foo.com" } }, "systemMetadata": { @@ -315,10 +452,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "viewProperties", "aspect": { "json": { - "removed": false + "materialized": false, + "viewLogic": "let\n Source = GoogleBigQuery.Database([BillingProject = #\"Parameter - Source\"]),\n#\"gcp-project\" = Source{[Name=#\"Parameter - Source\"]}[Data],\nuniversal_Schema = #\"gcp-project\"{[Name=\"universal\",Kind=\"Schema\"]}[Data],\nD_WH_DATE_Table = universal_Schema{[Name=\"D_WH_DATE\",Kind=\"Table\"]}[Data],\n#\"Filtered Rows\" = Table.SelectRows(D_WH_DATE_Table, each [D_DATE] > #datetime(2019, 9, 10, 0, 0, 0)),\n#\"Filtered Rows1\" = Table.SelectRows(#\"Filtered Rows\", each DateTime.IsInPreviousNHours([D_DATE], 87600))\n in \n#\"Filtered Rows1\"", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -328,15 +467,13 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:users.User1@foo.com", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "status", "aspect": { "json": { - "typeNames": [ - "Table" - ] + "removed": false } }, "systemMetadata": { @@ -347,14 +484,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "status", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = Value.NativeQuery(Snowflake.Databases(\"xaa48144.snowflakecomputing.com\",\"GSL_TEST_WH\",[Role=\"ACCOUNTADMIN\"]){[Name=\"GSL_TEST_DB\"]}[Data], \"select A.name from GSL_TEST_DB.PUBLIC.SALES_ANALYST as A inner join GSL_TEST_DB.PUBLIC.SALES_FORECAST as B on A.name = B.name where startswith(A.name, 'mo')\", null, [EnableFolding=true])\nin\n Source", - "viewLanguage": "m_query" + "removed": false } }, "systemMetadata": { @@ -365,7 +500,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { @@ -374,7 +509,7 @@ "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" }, "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "snowflake native-query-with-join", + "name": "big-query-with-parameter", "description": "Library dataset description", "tags": [] } @@ -386,13 +521,15 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "browsePaths", "aspect": { "json": { - "removed": false + "paths": [ + "/powerbi/demo-workspace" + ] } }, "systemMetadata": { @@ -403,7 +540,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -420,28 +557,66 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", "changeType": "UPSERT", - "aspectName": "upstreamLineage", + "aspectName": "status", "aspect": { "json": { - "upstreams": [ + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-lineage-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "changeType": "UPSERT", + "aspectName": "chartInfo", + "aspect": { + "json": { + "customProperties": { + "createdFrom": "Dataset", + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", + "datasetWebUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details" + }, + "title": "test_tile", + "description": "test_tile", + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + }, + "inputs": [ { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,sn-2.GSL_TEST_DB.PUBLIC.SALES_ANALYST,PROD)", - "type": "TRANSFORMED" + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)" }, { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,sn-2.GSL_TEST_DB.PUBLIC.SALES_FORECAST,PROD)", - "type": "TRANSFORMED" + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)" } ] } @@ -454,13 +629,13 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { "json": { "materialized": false, - "viewLogic": "let\n Source = Oracle.Database(\"localhost:1521/salesdb.domain.com\", [HierarchicalNavigation=true]), HR = Source{[Schema=\"HR\"]}[Data], EMPLOYEES1 = HR{[Name=\"EMPLOYEES\"]}[Data] \n in EMPLOYEES1", + "viewLogic": "let\n Source = Value.NativeQuery(Snowflake.Databases(\"xaa48144.snowflakecomputing.com\",\"GSL_TEST_WH\",[Role=\"ACCOUNTADMIN\"]){[Name=\"GSL_TEST_DB\"]}[Data], \"select A.name from GSL_TEST_DB.PUBLIC.SALES_ANALYST as A inner join GSL_TEST_DB.PUBLIC.SALES_FORECAST as B on A.name = B.name where startswith(A.name, 'mo')\", null, [EnableFolding=true])\nin\n Source", "viewLanguage": "m_query" } }, @@ -472,7 +647,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { @@ -481,7 +656,7 @@ "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" }, "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "job-history", + "name": "snowflake native-query-with-join", "description": "Library dataset description", "tags": [] } @@ -492,9 +667,27 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "PowerBI Tile" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-lineage-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -509,14 +702,16 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "Table" + "path": [ + { + "id": "demo-workspace" + } ] } }, @@ -528,20 +723,13 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", "changeType": "UPSERT", - "aspectName": "upstreamLineage", + "aspectName": "subTypes", "aspect": { "json": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:oracle,high_performance_production_unit.salesdb.HR.EMPLOYEES,PROD)", - "type": "TRANSFORMED" - } + "typeNames": [ + "Table" ] } }, @@ -553,13 +741,13 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { "json": { "materialized": false, - "viewLogic": "let\n Source = PostgreSQL.Database(\"localhost\" , \"mics\" ),\n public_order_date = Source{[Schema=\"public\",Item=\"order_date\"]}[Data] \n in \n public_order_date", + "viewLogic": "let\n Source = Sql.Database(\"localhost\", \"library\"),\n dbo_book_issue = Source{[Schema=\"dbo\",Item=\"book_issue\"]}[Data]\n in dbo_book_issue", "viewLanguage": "m_query" } }, @@ -571,18 +759,14 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "viewProperties", "aspect": { "json": { - "customProperties": { - "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" - }, - "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "postgres_test_table", - "description": "Library dataset description", - "tags": [] + "materialized": false, + "viewLogic": "let\n Source = Sql.Database(\"AUPRDWHDB\", \"COMMOPSDB\", [Query=\"select *,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTH_WID) as CD_AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,'-',''))), MONTH_WID) as AGENT_KEY#(lf)#(lf)from V_PS_CD_RETENTION\", CommandTimeout=#duration(0, 1, 30, 0)]),\n #\"Changed Type\" = Table.TransformColumnTypes(Source,{{\"mth_date\", type date}}),\n #\"Added Custom\" = Table.AddColumn(#\"Changed Type\", \"Month\", each Date.Month([mth_date])),\n #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"TPV Opening\", each if [Month] = 1 then [TPV_AMV_OPENING]\nelse if [Month] = 2 then 0\nelse if [Month] = 3 then 0\nelse if [Month] = 4 then [TPV_AMV_OPENING]\nelse if [Month] = 5 then 0\nelse if [Month] = 6 then 0\nelse if [Month] = 7 then [TPV_AMV_OPENING]\nelse if [Month] = 8 then 0\nelse if [Month] = 9 then 0\nelse if [Month] = 10 then [TPV_AMV_OPENING]\nelse if [Month] = 11 then 0\nelse if [Month] = 12 then 0\n\nelse 0)\nin\n #\"Added Custom1\"", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -593,12 +777,18 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "datasetProperties", "aspect": { "json": { - "removed": false + "customProperties": { + "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details", + "name": "ms_sql_native_table", + "description": "hr pbi test description", + "tags": [] } }, "systemMetadata": { @@ -609,14 +799,18 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "datasetProperties", "aspect": { "json": { - "typeNames": [ - "Table" - ] + "customProperties": { + "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details", + "name": "dbo_book_issue", + "description": "hr pbi test description", + "tags": [] } }, "systemMetadata": { @@ -627,21 +821,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", "changeType": "UPSERT", - "aspectName": "upstreamLineage", + "aspectName": "status", "aspect": { "json": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,operational_instance.mics.public.order_date,PROD)", - "type": "TRANSFORMED" - } - ] + "removed": false } }, "systemMetadata": { @@ -651,15 +836,14 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "chartKey", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = Sql.Database(\"localhost\", \"library\"),\n dbo_book_issue = Source{[Schema=\"dbo\",Item=\"book_issue\"]}[Data]\n in dbo_book_issue", - "viewLanguage": "m_query" + "dashboardTool": "powerbi", + "chartId": "charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0" } }, "systemMetadata": { @@ -670,18 +854,14 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "subTypes", "aspect": { "json": { - "customProperties": { - "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed" - }, - "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details", - "name": "dbo_book_issue", - "description": "hr pbi test description", - "tags": [] + "typeNames": [ + "Table" + ] } }, "systemMetadata": { @@ -707,14 +887,14 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePaths", "aspect": { "json": { - "typeNames": [ - "Table" + "paths": [ + "/powerbi/demo-workspace" ] } }, @@ -726,7 +906,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", "changeType": "UPSERT", "aspectName": "upstreamLineage", "aspect": { @@ -737,7 +917,7 @@ "time": 0, "actor": "urn:li:corpuser:unknown" }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:mssql,reporting-db.library.dbo.book_issue,PROD)", + "dataset": "urn:li:dataset:(urn:li:dataPlatform:mssql,reporting-db.COMMOPSDB.dbo.V_PS_CD_RETENTION,PROD)", "type": "TRANSFORMED" } ] @@ -751,36 +931,14 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", - "changeType": "UPSERT", - "aspectName": "viewProperties", - "aspect": { - "json": { - "materialized": false, - "viewLogic": "let\n Source = Sql.Database(\"AUPRDWHDB\", \"COMMOPSDB\", [Query=\"select *,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTH_WID) as CD_AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,'-',''))), MONTH_WID) as AGENT_KEY#(lf)#(lf)from V_PS_CD_RETENTION\", CommandTimeout=#duration(0, 1, 30, 0)]),\n #\"Changed Type\" = Table.TransformColumnTypes(Source,{{\"mth_date\", type date}}),\n #\"Added Custom\" = Table.AddColumn(#\"Changed Type\", \"Month\", each Date.Month([mth_date])),\n #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"TPV Opening\", each if [Month] = 1 then [TPV_AMV_OPENING]\nelse if [Month] = 2 then 0\nelse if [Month] = 3 then 0\nelse if [Month] = 4 then [TPV_AMV_OPENING]\nelse if [Month] = 5 then 0\nelse if [Month] = 6 then 0\nelse if [Month] = 7 then [TPV_AMV_OPENING]\nelse if [Month] = 8 then 0\nelse if [Month] = 9 then 0\nelse if [Month] = 10 then [TPV_AMV_OPENING]\nelse if [Month] = 11 then 0\nelse if [Month] = 12 then 0\n\nelse 0)\nin\n #\"Added Custom1\"", - "viewLanguage": "m_query" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-lineage-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "subTypes", "aspect": { "json": { - "customProperties": { - "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed" - }, - "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details", - "name": "ms_sql_native_table", - "description": "hr pbi test description", - "tags": [] + "typeNames": [ + "Table" + ] } }, "systemMetadata": { @@ -791,43 +949,9 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-lineage-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Table" - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-lineage-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", - "changeType": "UPSERT", - "aspectName": "upstreamLineage", + "aspectName": "upstreamLineage", "aspect": { "json": { "upstreams": [ @@ -836,7 +960,15 @@ "time": 0, "actor": "urn:li:corpuser:unknown" }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:mssql,reporting-db.COMMOPSDB.dbo.V_PS_CD_RETENTION,PROD)", + "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,sn-2.GSL_TEST_DB.PUBLIC.SALES_ANALYST,PROD)", + "type": "TRANSFORMED" + }, + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,sn-2.GSL_TEST_DB.PUBLIC.SALES_FORECAST,PROD)", "type": "TRANSFORMED" } ] @@ -849,30 +981,67 @@ } }, { - "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:users.User1@foo.com", - "changeType": "UPSERT", - "aspectName": "corpUserKey", - "aspect": { - "json": { - "username": "User1@foo.com" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-lineage-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:users.User2@foo.com", - "changeType": "UPSERT", - "aspectName": "corpUserKey", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "changeType": "PATCH", + "aspectName": "dashboardInfo", "aspect": { - "json": { - "username": "User2@foo.com" - } + "json": [ + { + "op": "add", + "path": "/customProperties/chartCount", + "value": "2" + }, + { + "op": "add", + "path": "/customProperties/workspaceName", + "value": "demo-workspace" + }, + { + "op": "add", + "path": "/customProperties/workspaceId", + "value": "64ED5CAD-7C10-4684-8180-826122881108" + }, + { + "op": "add", + "path": "/title", + "value": "test_dashboard" + }, + { + "op": "add", + "path": "/description", + "value": "Description of test dashboard" + }, + { + "op": "add", + "path": "/charts/urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "value": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)" + }, + { + "op": "add", + "path": "/charts/urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "value": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)" + }, + { + "op": "add", + "path": "/dashboardUrl", + "value": "https://localhost/dashboards/web/1" + }, + { + "op": "add", + "path": "/lastModified", + "value": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + } + ] }, "systemMetadata": { "lastObserved": 1643871600000, @@ -881,50 +1050,20 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", "changeType": "UPSERT", - "aspectName": "chartInfo", + "aspectName": "upstreamLineage", "aspect": { "json": { - "customProperties": { - "createdFrom": "Dataset", - "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", - "datasetWebUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details" - }, - "title": "test_tile", - "description": "test_tile", - "lastModified": { - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - }, - "inputs": [ - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)" - }, + "upstreams": [ { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)" + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:mssql,reporting-db.library.dbo.book_issue,PROD)", + "type": "TRANSFORMED" } ] } @@ -936,8 +1075,8 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -952,50 +1091,14 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "PowerBI Tile" - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-lineage-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", "changeType": "UPSERT", - "aspectName": "chartKey", + "aspectName": "dashboardKey", "aspect": { "json": { "dashboardTool": "powerbi", - "chartId": "charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-lineage-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", - "changeType": "UPSERT", - "aspectName": "browsePaths", - "aspect": { - "json": { - "paths": [ - "/powerbi/demo-workspace" - ] + "dashboardId": "powerbi.linkedin.com/dashboards/7D668CAD-7FFC-4505-9215-655BCA5BEBAE" } }, "systemMetadata": { @@ -1005,8 +1108,8 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -1025,70 +1128,27 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", "changeType": "UPSERT", - "aspectName": "chartInfo", + "aspectName": "ownership", "aspect": { "json": { - "customProperties": { - "createdFrom": "Dataset", - "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed", - "datasetWebUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details" - }, - "title": "yearly_sales", - "description": "yearly_sales", - "lastModified": { - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - }, - "inputs": [ + "owners": [ { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)" + "owner": "urn:li:corpuser:users.User1@foo.com", + "type": "NONE" }, { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)" + "owner": "urn:li:corpuser:users.User2@foo.com", + "type": "NONE" } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-lineage-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-lineage-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", - "changeType": "UPSERT", - "aspectName": "chartKey", - "aspect": { - "json": { - "dashboardTool": "powerbi", - "chartId": "charts.23212598-23b5-4980-87cc-5fc0ecd84385" + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } } }, "systemMetadata": { @@ -1098,14 +1158,21 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "upstreamLineage", "aspect": { "json": { - "typeNames": [ - "PowerBI Tile" + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:oracle,high_performance_production_unit.salesdb.HR.EMPLOYEES,PROD)", + "type": "TRANSFORMED" + } ] } }, @@ -1116,15 +1183,15 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", "changeType": "UPSERT", - "aspectName": "browsePaths", + "aspectName": "viewProperties", "aspect": { "json": { - "paths": [ - "/powerbi/demo-workspace" - ] + "materialized": false, + "viewLogic": "let\n Source = PostgreSQL.Database(\"localhost\" , \"mics\" ),\n public_order_date = Source{[Schema=\"public\",Item=\"order_date\"]}[Data] \n in \n public_order_date", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -1134,17 +1201,19 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "datasetProperties", "aspect": { "json": { - "path": [ - { - "id": "demo-workspace" - } - ] + "customProperties": { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", + "name": "postgres_test_table", + "description": "Library dataset description", + "tags": [] } }, "systemMetadata": { @@ -1154,15 +1223,15 @@ } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", "changeType": "UPSERT", - "aspectName": "browsePaths", + "aspectName": "viewProperties", "aspect": { "json": { - "paths": [ - "/powerbi/demo-workspace" - ] + "materialized": false, + "viewLogic": "let\n Source = Oracle.Database(\"localhost:1521/salesdb.domain.com\", [HierarchicalNavigation=true]), HR = Source{[Schema=\"HR\"]}[Data], EMPLOYEES1 = HR{[Name=\"EMPLOYEES\"]}[Data] \n in EMPLOYEES1", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -1172,77 +1241,8 @@ } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", - "changeType": "PATCH", - "aspectName": "dashboardInfo", - "aspect": { - "json": [ - { - "op": "add", - "path": "/customProperties/chartCount", - "value": "2" - }, - { - "op": "add", - "path": "/customProperties/workspaceName", - "value": "demo-workspace" - }, - { - "op": "add", - "path": "/customProperties/workspaceId", - "value": "64ED5CAD-7C10-4684-8180-826122881108" - }, - { - "op": "add", - "path": "/title", - "value": "test_dashboard" - }, - { - "op": "add", - "path": "/description", - "value": "Description of test dashboard" - }, - { - "op": "add", - "path": "/charts/urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", - "value": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)" - }, - { - "op": "add", - "path": "/charts/urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", - "value": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)" - }, - { - "op": "add", - "path": "/dashboardUrl", - "value": "https://localhost/dashboards/web/1" - }, - { - "op": "add", - "path": "/lastModified", - "value": { - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - } - ] - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-lineage-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -1257,14 +1257,19 @@ } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", "changeType": "UPSERT", - "aspectName": "dashboardKey", + "aspectName": "datasetProperties", "aspect": { "json": { - "dashboardTool": "powerbi", - "dashboardId": "powerbi.linkedin.com/dashboards/7D668CAD-7FFC-4505-9215-655BCA5BEBAE" + "customProperties": { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", + "name": "job-history", + "description": "Library dataset description", + "tags": [] } }, "systemMetadata": { @@ -1274,27 +1279,15 @@ } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", "changeType": "UPSERT", - "aspectName": "ownership", + "aspectName": "subTypes", "aspect": { "json": { - "owners": [ - { - "owner": "urn:li:corpuser:users.User1@foo.com", - "type": "NONE" - }, - { - "owner": "urn:li:corpuser:users.User2@foo.com", - "type": "NONE" - } - ], - "ownerTypes": {}, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } + "typeNames": [ + "Table" + ] } }, "systemMetadata": { @@ -1304,16 +1297,14 @@ } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "subTypes", "aspect": { "json": { - "path": [ - { - "id": "demo-workspace" - } + "typeNames": [ + "Table" ] } }, @@ -1324,8 +1315,8 @@ } }, { - "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:users.User1@foo.com", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -1340,13 +1331,22 @@ } }, { - "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:users.User2@foo.com", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "upstreamLineage", "aspect": { "json": { - "removed": false + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,operational_instance.mics.public.order_date,PROD)", + "type": "TRANSFORMED" + } + ] } }, "systemMetadata": { diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_lower_case_urn_ingest.json b/metadata-ingestion/tests/integration/powerbi/golden_test_lower_case_urn_ingest.json index 6f502cdfc0f5b..f4cd657b860e0 100644 --- a/metadata-ingestion/tests/integration/powerbi/golden_test_lower_case_urn_ingest.json +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_lower_case_urn_ingest.json @@ -33,6 +33,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.public_issue_history,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.public_issue_history,PROD)", @@ -56,13 +72,15 @@ } }, { - "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:users.User2@foo.com", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.snowflake_testtable,PROD)", "changeType": "UPSERT", - "aspectName": "corpUserKey", + "aspectName": "viewProperties", "aspect": { "json": { - "username": "User2@foo.com" + "materialized": false, + "viewLogic": "let\n Source = Snowflake.Databases(\"hp123rt5.ap-southeast-2.fakecomputing.com\",\"PBI_TEST_WAREHOUSE_PROD\",[Role=\"PBI_TEST_MEMBER\"]),\n PBI_TEST_Database = Source{[Name=\"PBI_TEST\",Kind=\"Database\"]}[Data],\n TEST_Schema = PBI_TEST_Database{[Name=\"TEST\",Kind=\"Schema\"]}[Data],\n TESTTABLE_Table = TEST_Schema{[Name=\"TESTTABLE\",Kind=\"Table\"]}[Data]\nin\n TESTTABLE_Table", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -72,15 +90,13 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.snowflake_testtable,PROD)", + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:users.User2@foo.com", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "corpUserKey", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = Snowflake.Databases(\"hp123rt5.ap-southeast-2.fakecomputing.com\",\"PBI_TEST_WAREHOUSE_PROD\",[Role=\"PBI_TEST_MEMBER\"]),\n PBI_TEST_Database = Source{[Name=\"PBI_TEST\",Kind=\"Database\"]}[Data],\n TEST_Schema = PBI_TEST_Database{[Name=\"TEST\",Kind=\"Schema\"]}[Data],\n TESTTABLE_Table = TEST_Schema{[Name=\"TESTTABLE\",Kind=\"Table\"]}[Data]\nin\n TESTTABLE_Table", - "viewLanguage": "m_query" + "username": "User2@foo.com" } }, "systemMetadata": { @@ -91,7 +107,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.public_issue_history,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.snowflake_testtable,PROD)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -127,30 +143,16 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:users.User1@foo.com", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-test", - "lastRunId": "no-run-id-provided" - } -}, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.snowflake_testtable,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.public_issue_history,PROD)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "subTypes", "aspect": { "json": { - "removed": false + "typeNames": [ + "Table" + ] } }, "systemMetadata": { @@ -161,7 +163,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.public_issue_history,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.snowflake_testtable,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -178,15 +180,13 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.snowflake_testtable,PROD)", + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:users.User1@foo.com", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "status", "aspect": { "json": { - "typeNames": [ - "Table" - ] + "removed": false } }, "systemMetadata": { @@ -213,13 +213,13 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.big-query-with-parameter,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.snowflake_native-query,PROD)", "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { "json": { "materialized": false, - "viewLogic": "let\n Source = GoogleBigQuery.Database([BillingProject = #\"Parameter - Source\"]),\n#\"gcp-project\" = Source{[Name=#\"Parameter - Source\"]}[Data],\nuniversal_Schema = #\"gcp-project\"{[Name=\"universal\",Kind=\"Schema\"]}[Data],\nD_WH_DATE_Table = universal_Schema{[Name=\"D_WH_DATE\",Kind=\"Table\"]}[Data],\n#\"Filtered Rows\" = Table.SelectRows(D_WH_DATE_Table, each [D_DATE] > #datetime(2019, 9, 10, 0, 0, 0)),\n#\"Filtered Rows1\" = Table.SelectRows(#\"Filtered Rows\", each DateTime.IsInPreviousNHours([D_DATE], 87600))\n in \n#\"Filtered Rows1\"", + "viewLogic": "let\n Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"SELECT#(lf)concat((UPPER(REPLACE(SELLER,'-',''))), MONTHID) as AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTHID) as CD_AGENT_KEY,#(lf) *#(lf)FROM#(lf)OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_APS_SME_UNITS_V4\", null, [EnableFolding=true]),\n #\"Added Conditional Column\" = Table.AddColumn(Source, \"SME Units ENT\", each if [DEAL_TYPE] = \"SME Unit\" then [UNIT] else 0),\n #\"Added Conditional Column1\" = Table.AddColumn(#\"Added Conditional Column\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" then [UNIT] else 0),\n #\"Removed Columns\" = Table.RemoveColumns(#\"Added Conditional Column1\",{\"Banklink Units\"}),\n #\"Added Custom\" = Table.AddColumn(#\"Removed Columns\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" and [SALES_TYPE] = \"3 - Upsell\"\nthen [UNIT]\n\nelse if [SALES_TYPE] = \"Adjusted BL Migration\"\nthen [UNIT]\n\nelse 0),\n #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"SME Units in $ (*$361)\", each if [DEAL_TYPE] = \"SME Unit\" \nand [SALES_TYPE] <> \"4 - Renewal\"\n then [UNIT] * 361\nelse 0),\n #\"Added Custom2\" = Table.AddColumn(#\"Added Custom1\", \"Banklink in $ (*$148)\", each [Banklink Units] * 148)\nin\n #\"Added Custom2\"", "viewLanguage": "m_query" } }, @@ -229,16 +229,69 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,myPlatformInstance.charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "changeType": "UPSERT", + "aspectName": "chartInfo", + "aspect": { + "json": { + "customProperties": { + "createdFrom": "Dataset", + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", + "datasetWebUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details" + }, + "title": "test_tile", + "description": "test_tile", + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + }, + "inputs": [ + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.public_issue_history,PROD)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.snowflake_testtable,PROD)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.snowflake_native-query,PROD)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.big-query-with-parameter,PROD)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.snowflake_native-query-with-join,PROD)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.job-history,PROD)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.postgres_test_table,PROD)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.snowflake_native-query,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "status", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"SELECT#(lf)concat((UPPER(REPLACE(SELLER,'-',''))), MONTHID) as AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTHID) as CD_AGENT_KEY,#(lf) *#(lf)FROM#(lf)OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_APS_SME_UNITS_V4\", null, [EnableFolding=true]),\n #\"Added Conditional Column\" = Table.AddColumn(Source, \"SME Units ENT\", each if [DEAL_TYPE] = \"SME Unit\" then [UNIT] else 0),\n #\"Added Conditional Column1\" = Table.AddColumn(#\"Added Conditional Column\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" then [UNIT] else 0),\n #\"Removed Columns\" = Table.RemoveColumns(#\"Added Conditional Column1\",{\"Banklink Units\"}),\n #\"Added Custom\" = Table.AddColumn(#\"Removed Columns\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" and [SALES_TYPE] = \"3 - Upsell\"\nthen [UNIT]\n\nelse if [SALES_TYPE] = \"Adjusted BL Migration\"\nthen [UNIT]\n\nelse 0),\n #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"SME Units in $ (*$361)\", each if [DEAL_TYPE] = \"SME Unit\" \nand [SALES_TYPE] <> \"4 - Renewal\"\n then [UNIT] * 361\nelse 0),\n #\"Added Custom2\" = Table.AddColumn(#\"Added Custom1\", \"Banklink in $ (*$148)\", each [Banklink Units] * 148)\nin\n #\"Added Custom2\"", - "viewLanguage": "m_query" + "removed": false } }, "systemMetadata": { @@ -249,7 +302,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.big-query-with-parameter,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.snowflake_native-query,PROD)", "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { @@ -258,7 +311,7 @@ "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" }, "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "big-query-with-parameter", + "name": "snowflake native-query", "description": "Library dataset description", "tags": [] } @@ -271,13 +324,31 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.job-history,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.big-query-with-parameter,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "let\n Source = GoogleBigQuery.Database([BillingProject = #\"Parameter - Source\"]),\n#\"gcp-project\" = Source{[Name=#\"Parameter - Source\"]}[Data],\nuniversal_Schema = #\"gcp-project\"{[Name=\"universal\",Kind=\"Schema\"]}[Data],\nD_WH_DATE_Table = universal_Schema{[Name=\"D_WH_DATE\",Kind=\"Table\"]}[Data],\n#\"Filtered Rows\" = Table.SelectRows(D_WH_DATE_Table, each [D_DATE] > #datetime(2019, 9, 10, 0, 0, 0)),\n#\"Filtered Rows1\" = Table.SelectRows(#\"Filtered Rows\", each DateTime.IsInPreviousNHours([D_DATE], 87600))\n in \n#\"Filtered Rows1\"", + "viewLanguage": "m_query" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,myPlatformInstance.charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { "json": { "typeNames": [ - "Table" + "PowerBI Tile" ] } }, @@ -305,7 +376,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.snowflake_native-query,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.big-query-with-parameter,PROD)", "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { @@ -314,7 +385,7 @@ "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" }, "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "snowflake native-query", + "name": "big-query-with-parameter", "description": "Library dataset description", "tags": [] } @@ -327,14 +398,14 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.big-query-with-parameter,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.job-history,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "viewProperties", "aspect": { "json": { - "typeNames": [ - "Table" - ] + "materialized": false, + "viewLogic": "let\n Source = Oracle.Database(\"localhost:1521/salesdb.domain.com\", [HierarchicalNavigation=true]), HR = Source{[Schema=\"HR\"]}[Data], EMPLOYEES1 = HR{[Name=\"EMPLOYEES\"]}[Data] \n in EMPLOYEES1", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -345,12 +416,14 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.job-history,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.snowflake_native-query,PROD)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "subTypes", "aspect": { "json": { - "removed": false + "typeNames": [ + "Table" + ] } }, "systemMetadata": { @@ -361,7 +434,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.snowflake_native-query,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.job-history,PROD)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -399,7 +472,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.snowflake_native-query,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.big-query-with-parameter,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -419,12 +492,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.job-history,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = Oracle.Database(\"localhost:1521/salesdb.domain.com\", [HierarchicalNavigation=true]), HR = Source{[Schema=\"HR\"]}[Data], EMPLOYEES1 = HR{[Name=\"EMPLOYEES\"]}[Data] \n in EMPLOYEES1", - "viewLanguage": "m_query" + "typeNames": [ + "Table" + ] } }, "systemMetadata": { @@ -434,14 +507,14 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.snowflake_native-query-with-join,PROD)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,myPlatformInstance.charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePaths", "aspect": { "json": { - "typeNames": [ - "Table" + "paths": [ + "/powerbi/demo-workspace" ] } }, @@ -452,13 +525,14 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.snowflake_native-query-with-join,PROD)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,myPlatformInstance.charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "chartKey", "aspect": { "json": { - "removed": false + "dashboardTool": "powerbi", + "chartId": "myPlatformInstance.charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0" } }, "systemMetadata": { @@ -471,16 +545,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.snowflake_native-query-with-join,PROD)", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "viewProperties", "aspect": { "json": { - "customProperties": { - "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" - }, - "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "snowflake native-query-with-join", - "description": "Library dataset description", - "tags": [] + "materialized": false, + "viewLogic": "let\n Source = Value.NativeQuery(Snowflake.Databases(\"xaa48144.snowflakecomputing.com\",\"GSL_TEST_WH\",[Role=\"ACCOUNTADMIN\"]){[Name=\"GSL_TEST_DB\"]}[Data], \"select A.name from GSL_TEST_DB.PUBLIC.SALES_ANALYST as A inner join GSL_TEST_DB.PUBLIC.SALES_FORECAST as B on A.name = B.name where startswith(A.name, 'mo')\", null, [EnableFolding=true])\nin\n Source", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -490,15 +560,21 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.snowflake_native-query-with-join,PROD)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,myPlatformInstance.charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "browsePathsV2", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = Value.NativeQuery(Snowflake.Databases(\"xaa48144.snowflakecomputing.com\",\"GSL_TEST_WH\",[Role=\"ACCOUNTADMIN\"]){[Name=\"GSL_TEST_DB\"]}[Data], \"select A.name from GSL_TEST_DB.PUBLIC.SALES_ANALYST as A inner join GSL_TEST_DB.PUBLIC.SALES_FORECAST as B on A.name = B.name where startswith(A.name, 'mo')\", null, [EnableFolding=true])\nin\n Source", - "viewLanguage": "m_query" + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:powerbi,myPlatformInstance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:powerbi,myPlatformInstance)" + }, + { + "id": "demo-workspace" + } + ] } }, "systemMetadata": { @@ -509,69 +585,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.postgres_test_table,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.snowflake_native-query-with-join,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "status", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = PostgreSQL.Database(\"localhost\" , \"mics\" ),\n public_order_date = Source{[Schema=\"public\",Item=\"order_date\"]}[Data] \n in \n public_order_date", - "viewLanguage": "m_query" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,myPlatformInstance.charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", - "changeType": "UPSERT", - "aspectName": "chartInfo", - "aspect": { - "json": { - "customProperties": { - "createdFrom": "Dataset", - "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", - "datasetWebUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details" - }, - "title": "test_tile", - "description": "test_tile", - "lastModified": { - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - }, - "inputs": [ - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.public_issue_history,PROD)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.snowflake_testtable,PROD)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.snowflake_native-query,PROD)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.big-query-with-parameter,PROD)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.snowflake_native-query-with-join,PROD)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.job-history,PROD)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.postgres_test_table,PROD)" - } - ] + "removed": false } }, "systemMetadata": { @@ -582,7 +601,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.postgres_test_table,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.snowflake_native-query-with-join,PROD)", "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { @@ -591,7 +610,7 @@ "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" }, "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "postgres_test_table", + "name": "snowflake native-query-with-join", "description": "Library dataset description", "tags": [] } @@ -606,24 +625,6 @@ "entityType": "chart", "entityUrn": "urn:li:chart:(powerbi,myPlatformInstance.charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "PowerBI Tile" - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.postgres_test_table,PROD)", - "changeType": "UPSERT", "aspectName": "status", "aspect": { "json": { @@ -637,14 +638,14 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,myPlatformInstance.charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.snowflake_native-query-with-join,PROD)", "changeType": "UPSERT", - "aspectName": "browsePaths", + "aspectName": "subTypes", "aspect": { "json": { - "paths": [ - "/powerbi/demo-workspace" + "typeNames": [ + "Table" ] } }, @@ -655,15 +656,27 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.postgres_test_table,PROD)", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,myPlatformInstance.dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "ownership", "aspect": { "json": { - "typeNames": [ - "Table" - ] + "owners": [ + { + "owner": "urn:li:corpuser:users.User1@foo.com", + "type": "NONE" + }, + { + "owner": "urn:li:corpuser:users.User2@foo.com", + "type": "NONE" + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } } }, "systemMetadata": { @@ -673,14 +686,14 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,myPlatformInstance.charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,myPlatformInstance.dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", "changeType": "UPSERT", - "aspectName": "chartKey", + "aspectName": "dashboardKey", "aspect": { "json": { "dashboardTool": "powerbi", - "chartId": "myPlatformInstance.charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0" + "dashboardId": "powerbi.linkedin.com/dashboards/7D668CAD-7FFC-4505-9215-655BCA5BEBAE" } }, "systemMetadata": { @@ -691,18 +704,34 @@ }, { "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,myPlatformInstance.charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "entityUrn": "urn:li:chart:(powerbi,myPlatformInstance.charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "chartInfo", "aspect": { "json": { - "path": [ + "customProperties": { + "createdFrom": "Dataset", + "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed", + "datasetWebUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details" + }, + "title": "yearly_sales", + "description": "yearly_sales", + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + }, + "inputs": [ { - "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:powerbi,myPlatformInstance)", - "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:powerbi,myPlatformInstance)" + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.hr_pbi_test.dbo_book_issue,PROD)" }, { - "id": "demo-workspace" + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.hr_pbi_test.ms_sql_native_table,PROD)" } ] } @@ -715,7 +744,7 @@ }, { "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,myPlatformInstance.charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "entityUrn": "urn:li:chart:(powerbi,myPlatformInstance.charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -730,14 +759,14 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.hr_pbi_test.dbo_book_issue,PROD)", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,myPlatformInstance.dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePaths", "aspect": { "json": { - "typeNames": [ - "Table" + "paths": [ + "/powerbi/demo-workspace" ] } }, @@ -748,35 +777,15 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.hr_pbi_test.dbo_book_issue,PROD)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.hr_pbi_test.dbo_book_issue,PROD)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,myPlatformInstance.charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "subTypes", "aspect": { "json": { - "customProperties": { - "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed" - }, - "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details", - "name": "dbo_book_issue", - "description": "hr pbi test description", - "tags": [] + "typeNames": [ + "PowerBI Tile" + ] } }, "systemMetadata": { @@ -787,13 +796,13 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.hr_pbi_test.dbo_book_issue,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.hr_pbi_test.ms_sql_native_table,PROD)", "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { "json": { "materialized": false, - "viewLogic": "let\n Source = Sql.Database(\"localhost\", \"library\"),\n dbo_book_issue = Source{[Schema=\"dbo\",Item=\"book_issue\"]}[Data]\n in dbo_book_issue", + "viewLogic": "let\n Source = Sql.Database(\"AUPRDWHDB\", \"COMMOPSDB\", [Query=\"select *,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTH_WID) as CD_AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,'-',''))), MONTH_WID) as AGENT_KEY#(lf)#(lf)from V_PS_CD_RETENTION\", CommandTimeout=#duration(0, 1, 30, 0)]),\n #\"Changed Type\" = Table.TransformColumnTypes(Source,{{\"mth_date\", type date}}),\n #\"Added Custom\" = Table.AddColumn(#\"Changed Type\", \"Month\", each Date.Month([mth_date])),\n #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"TPV Opening\", each if [Month] = 1 then [TPV_AMV_OPENING]\nelse if [Month] = 2 then 0\nelse if [Month] = 3 then 0\nelse if [Month] = 4 then [TPV_AMV_OPENING]\nelse if [Month] = 5 then 0\nelse if [Month] = 6 then 0\nelse if [Month] = 7 then [TPV_AMV_OPENING]\nelse if [Month] = 8 then 0\nelse if [Month] = 9 then 0\nelse if [Month] = 10 then [TPV_AMV_OPENING]\nelse if [Month] = 11 then 0\nelse if [Month] = 12 then 0\n\nelse 0)\nin\n #\"Added Custom1\"", "viewLanguage": "m_query" } }, @@ -804,15 +813,14 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.hr_pbi_test.ms_sql_native_table,PROD)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,myPlatformInstance.charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "chartKey", "aspect": { "json": { - "typeNames": [ - "Table" - ] + "dashboardTool": "powerbi", + "chartId": "myPlatformInstance.charts.23212598-23b5-4980-87cc-5fc0ecd84385" } }, "systemMetadata": { @@ -859,16 +867,85 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,myPlatformInstance.dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "changeType": "PATCH", + "aspectName": "dashboardInfo", + "aspect": { + "json": [ + { + "op": "add", + "path": "/customProperties/chartCount", + "value": "2" + }, + { + "op": "add", + "path": "/customProperties/workspaceName", + "value": "demo-workspace" + }, + { + "op": "add", + "path": "/customProperties/workspaceId", + "value": "64ED5CAD-7C10-4684-8180-826122881108" + }, + { + "op": "add", + "path": "/title", + "value": "test_dashboard" + }, + { + "op": "add", + "path": "/description", + "value": "Description of test dashboard" + }, + { + "op": "add", + "path": "/charts/urn:li:chart:(powerbi,myPlatformInstance.charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "value": "urn:li:chart:(powerbi,myPlatformInstance.charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)" + }, + { + "op": "add", + "path": "/charts/urn:li:chart:(powerbi,myPlatformInstance.charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "value": "urn:li:chart:(powerbi,myPlatformInstance.charts.23212598-23b5-4980-87cc-5fc0ecd84385)" + }, + { + "op": "add", + "path": "/dashboardUrl", + "value": "https://localhost/dashboards/web/1" + }, + { + "op": "add", + "path": "/lastModified", + "value": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.hr_pbi_test.ms_sql_native_table,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = Sql.Database(\"AUPRDWHDB\", \"COMMOPSDB\", [Query=\"select *,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTH_WID) as CD_AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,'-',''))), MONTH_WID) as AGENT_KEY#(lf)#(lf)from V_PS_CD_RETENTION\", CommandTimeout=#duration(0, 1, 30, 0)]),\n #\"Changed Type\" = Table.TransformColumnTypes(Source,{{\"mth_date\", type date}}),\n #\"Added Custom\" = Table.AddColumn(#\"Changed Type\", \"Month\", each Date.Month([mth_date])),\n #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"TPV Opening\", each if [Month] = 1 then [TPV_AMV_OPENING]\nelse if [Month] = 2 then 0\nelse if [Month] = 3 then 0\nelse if [Month] = 4 then [TPV_AMV_OPENING]\nelse if [Month] = 5 then 0\nelse if [Month] = 6 then 0\nelse if [Month] = 7 then [TPV_AMV_OPENING]\nelse if [Month] = 8 then 0\nelse if [Month] = 9 then 0\nelse if [Month] = 10 then [TPV_AMV_OPENING]\nelse if [Month] = 11 then 0\nelse if [Month] = 12 then 0\n\nelse 0)\nin\n #\"Added Custom1\"", - "viewLanguage": "m_query" + "typeNames": [ + "Table" + ] } }, "systemMetadata": { @@ -878,27 +955,21 @@ } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,myPlatformInstance.dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,myPlatformInstance.charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "ownership", + "aspectName": "browsePathsV2", "aspect": { "json": { - "owners": [ + "path": [ { - "owner": "urn:li:corpuser:users.User1@foo.com", - "type": "NONE" + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:powerbi,myPlatformInstance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:powerbi,myPlatformInstance)" }, { - "owner": "urn:li:corpuser:users.User2@foo.com", - "type": "NONE" + "id": "demo-workspace" } - ], - "ownerTypes": {}, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } + ] } }, "systemMetadata": { @@ -908,14 +979,15 @@ } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,myPlatformInstance.dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,myPlatformInstance.charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "dashboardKey", + "aspectName": "browsePaths", "aspect": { "json": { - "dashboardTool": "powerbi", - "dashboardId": "powerbi.linkedin.com/dashboards/7D668CAD-7FFC-4505-9215-655BCA5BEBAE" + "paths": [ + "/powerbi/demo-workspace" + ] } }, "systemMetadata": { @@ -925,35 +997,19 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,myPlatformInstance.charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,myPlatformInstance.dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", "changeType": "UPSERT", - "aspectName": "chartInfo", + "aspectName": "browsePathsV2", "aspect": { "json": { - "customProperties": { - "createdFrom": "Dataset", - "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed", - "datasetWebUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details" - }, - "title": "yearly_sales", - "description": "yearly_sales", - "lastModified": { - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - }, - "inputs": [ + "path": [ { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.hr_pbi_test.dbo_book_issue,PROD)" + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:powerbi,myPlatformInstance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:powerbi,myPlatformInstance)" }, { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.hr_pbi_test.ms_sql_native_table,PROD)" + "id": "demo-workspace" } ] } @@ -965,8 +1021,8 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,myPlatformInstance.charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,myPlatformInstance.dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -981,15 +1037,19 @@ } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,myPlatformInstance.dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.postgres_test_table,PROD)", "changeType": "UPSERT", - "aspectName": "browsePaths", + "aspectName": "datasetProperties", "aspect": { "json": { - "paths": [ - "/powerbi/demo-workspace" - ] + "customProperties": { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", + "name": "postgres_test_table", + "description": "Library dataset description", + "tags": [] } }, "systemMetadata": { @@ -999,14 +1059,14 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,myPlatformInstance.charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.postgres_test_table,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { "json": { "typeNames": [ - "PowerBI Tile" + "Table" ] } }, @@ -1017,14 +1077,15 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,myPlatformInstance.charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.hr_pbi_test.dbo_book_issue,PROD)", "changeType": "UPSERT", - "aspectName": "chartKey", + "aspectName": "viewProperties", "aspect": { "json": { - "dashboardTool": "powerbi", - "chartId": "myPlatformInstance.charts.23212598-23b5-4980-87cc-5fc0ecd84385" + "materialized": false, + "viewLogic": "let\n Source = Sql.Database(\"localhost\", \"library\"),\n dbo_book_issue = Source{[Schema=\"dbo\",Item=\"book_issue\"]}[Data]\n in dbo_book_issue", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -1034,67 +1095,16 @@ } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,myPlatformInstance.dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", - "changeType": "PATCH", - "aspectName": "dashboardInfo", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.postgres_test_table,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", "aspect": { - "json": [ - { - "op": "add", - "path": "/customProperties/chartCount", - "value": "2" - }, - { - "op": "add", - "path": "/customProperties/workspaceName", - "value": "demo-workspace" - }, - { - "op": "add", - "path": "/customProperties/workspaceId", - "value": "64ED5CAD-7C10-4684-8180-826122881108" - }, - { - "op": "add", - "path": "/title", - "value": "test_dashboard" - }, - { - "op": "add", - "path": "/description", - "value": "Description of test dashboard" - }, - { - "op": "add", - "path": "/charts/urn:li:chart:(powerbi,myPlatformInstance.charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", - "value": "urn:li:chart:(powerbi,myPlatformInstance.charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)" - }, - { - "op": "add", - "path": "/charts/urn:li:chart:(powerbi,myPlatformInstance.charts.23212598-23b5-4980-87cc-5fc0ecd84385)", - "value": "urn:li:chart:(powerbi,myPlatformInstance.charts.23212598-23b5-4980-87cc-5fc0ecd84385)" - }, - { - "op": "add", - "path": "/dashboardUrl", - "value": "https://localhost/dashboards/web/1" - }, - { - "op": "add", - "path": "/lastModified", - "value": { - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - } - ] + "json": { + "materialized": false, + "viewLogic": "let\n Source = PostgreSQL.Database(\"localhost\" , \"mics\" ),\n public_order_date = Source{[Schema=\"public\",Item=\"order_date\"]}[Data] \n in \n public_order_date", + "viewLanguage": "m_query" + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -1103,21 +1113,13 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,myPlatformInstance.charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.hr_pbi_test.dbo_book_issue,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "status", "aspect": { "json": { - "path": [ - { - "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:powerbi,myPlatformInstance)", - "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:powerbi,myPlatformInstance)" - }, - { - "id": "demo-workspace" - } - ] + "removed": false } }, "systemMetadata": { @@ -1127,15 +1129,19 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,myPlatformInstance.charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.hr_pbi_test.dbo_book_issue,PROD)", "changeType": "UPSERT", - "aspectName": "browsePaths", + "aspectName": "datasetProperties", "aspect": { "json": { - "paths": [ - "/powerbi/demo-workspace" - ] + "customProperties": { + "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details", + "name": "dbo_book_issue", + "description": "hr pbi test description", + "tags": [] } }, "systemMetadata": { @@ -1145,21 +1151,13 @@ } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,myPlatformInstance.dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.postgres_test_table,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "status", "aspect": { "json": { - "path": [ - { - "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:powerbi,myPlatformInstance)", - "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:powerbi,myPlatformInstance)" - }, - { - "id": "demo-workspace" - } - ] + "removed": false } }, "systemMetadata": { @@ -1169,13 +1167,15 @@ } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,myPlatformInstance.dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.hr_pbi_test.dbo_book_issue,PROD)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "subTypes", "aspect": { "json": { - "removed": false + "typeNames": [ + "Table" + ] } }, "systemMetadata": { diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_most_config_and_modified_since_admin_only.json b/metadata-ingestion/tests/integration/powerbi/golden_test_most_config_and_modified_since_admin_only.json index 4393a87d1f570..e134d795af9ef 100644 --- a/metadata-ingestion/tests/integration/powerbi/golden_test_most_config_and_modified_since_admin_only.json +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_most_config_and_modified_since_admin_only.json @@ -86,15 +86,30 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "chartInfo", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = Sql.Database(\"localhost\", \"library\"),\n dbo_book_issue = Source{[Schema=\"dbo\",Item=\"book_issue\"]}[Data]\n in dbo_book_issue", - "viewLanguage": "m_query" + "customProperties": { + "createdFrom": "Dataset", + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", + "datasetWebUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details" + }, + "title": "test_tile", + "description": "test_tile", + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + }, + "inputs": [] } }, "systemMetadata": { @@ -104,30 +119,13 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", "changeType": "UPSERT", - "aspectName": "schemaMetadata", + "aspectName": "status", "aspect": { "json": { - "schemaName": "dbo_book_issue", - "platform": "urn:li:dataPlatform:powerbi", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.schema.OtherSchema": { - "rawSchema": "" - } - }, - "fields": [] + "removed": false } }, "systemMetadata": { @@ -137,19 +135,14 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "chartKey", "aspect": { "json": { - "customProperties": { - "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed" - }, - "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details", - "name": "dbo_book_issue", - "description": "", - "tags": [] + "dashboardTool": "powerbi", + "chartId": "charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0" } }, "systemMetadata": { @@ -159,13 +152,15 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "browsePaths", "aspect": { "json": { - "removed": false + "paths": [ + "/powerbi/demo-workspace" + ] } }, "systemMetadata": { @@ -175,14 +170,33 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "container", "aspect": { "json": { - "typeNames": [ - "Table" + "container": "urn:li:container:e3dc21b5c79f9d594f639a9f57d7f2c3" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:e3dc21b5c79f9d594f639a9f57d7f2c3", + "urn": "urn:li:container:e3dc21b5c79f9d594f639a9f57d7f2c3" + } ] } }, @@ -196,10 +210,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:977b804137a1d2bf897ff1bbf440a1cc" + "materialized": false, + "viewLogic": "let\n Source = Sql.Database(\"localhost\", \"library\"),\n dbo_book_issue = Source{[Schema=\"dbo\",Item=\"book_issue\"]}[Data]\n in dbo_book_issue", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -209,15 +225,38 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "globalTags", + "aspectName": "chartInfo", "aspect": { "json": { - "tags": [ + "customProperties": { + "createdFrom": "Dataset", + "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed", + "datasetWebUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details" + }, + "title": "yearly_sales", + "description": "yearly_sales", + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + }, + "inputs": [ { - "tag": "urn:li:tag:Certified" + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.revenue,DEV)" } ] } @@ -232,19 +271,16 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "datasetProperties", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:e3dc21b5c79f9d594f639a9f57d7f2c3", - "urn": "urn:li:container:e3dc21b5c79f9d594f639a9f57d7f2c3" - }, - { - "id": "urn:li:container:977b804137a1d2bf897ff1bbf440a1cc", - "urn": "urn:li:container:977b804137a1d2bf897ff1bbf440a1cc" - } - ] + "customProperties": { + "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details", + "name": "dbo_book_issue", + "description": "", + "tags": [] } }, "systemMetadata": { @@ -255,14 +291,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "status", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = Sql.Database(\"AUPRDWHDB\", \"COMMOPSDB\", [Query=\"select *,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTH_WID) as CD_AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,'-',''))), MONTH_WID) as AGENT_KEY#(lf)#(lf)from V_PS_CD_RETENTION\", CommandTimeout=#duration(0, 1, 30, 0)]),\n #\"Changed Type\" = Table.TransformColumnTypes(Source,{{\"mth_date\", type date}}),\n #\"Added Custom\" = Table.AddColumn(#\"Changed Type\", \"Month\", each Date.Month([mth_date])),\n #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"TPV Opening\", each if [Month] = 1 then [TPV_AMV_OPENING]\nelse if [Month] = 2 then 0\nelse if [Month] = 3 then 0\nelse if [Month] = 4 then [TPV_AMV_OPENING]\nelse if [Month] = 5 then 0\nelse if [Month] = 6 then 0\nelse if [Month] = 7 then [TPV_AMV_OPENING]\nelse if [Month] = 8 then 0\nelse if [Month] = 9 then 0\nelse if [Month] = 10 then [TPV_AMV_OPENING]\nelse if [Month] = 11 then 0\nelse if [Month] = 12 then 0\n\nelse 0)\nin\n #\"Added Custom1\"", - "viewLanguage": "m_query" + "removed": false } }, "systemMetadata": { @@ -273,12 +307,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", "changeType": "UPSERT", "aspectName": "schemaMetadata", "aspect": { "json": { - "schemaName": "ms_sql_native_table", + "schemaName": "dbo_book_issue", "platform": "urn:li:dataPlatform:powerbi", "version": 0, "created": { @@ -305,19 +339,13 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "status", "aspect": { "json": { - "customProperties": { - "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed" - }, - "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details", - "name": "ms_sql_native_table", - "description": "", - "tags": [] + "removed": false } }, "systemMetadata": { @@ -353,12 +381,16 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "globalTags", "aspect": { "json": { - "removed": false + "tags": [ + { + "tag": "urn:li:tag:Certified" + } + ] } }, "systemMetadata": { @@ -368,14 +400,14 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { "json": { "typeNames": [ - "Table" + "PowerBI Tile" ] } }, @@ -389,10 +421,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:977b804137a1d2bf897ff1bbf440a1cc" + "materialized": false, + "viewLogic": "let\n Source = Sql.Database(\"AUPRDWHDB\", \"COMMOPSDB\", [Query=\"select *,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTH_WID) as CD_AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,'-',''))), MONTH_WID) as AGENT_KEY#(lf)#(lf)from V_PS_CD_RETENTION\", CommandTimeout=#duration(0, 1, 30, 0)]),\n #\"Changed Type\" = Table.TransformColumnTypes(Source,{{\"mth_date\", type date}}),\n #\"Added Custom\" = Table.AddColumn(#\"Changed Type\", \"Month\", each Date.Month([mth_date])),\n #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"TPV Opening\", each if [Month] = 1 then [TPV_AMV_OPENING]\nelse if [Month] = 2 then 0\nelse if [Month] = 3 then 0\nelse if [Month] = 4 then [TPV_AMV_OPENING]\nelse if [Month] = 5 then 0\nelse if [Month] = 6 then 0\nelse if [Month] = 7 then [TPV_AMV_OPENING]\nelse if [Month] = 8 then 0\nelse if [Month] = 9 then 0\nelse if [Month] = 10 then [TPV_AMV_OPENING]\nelse if [Month] = 11 then 0\nelse if [Month] = 12 then 0\n\nelse 0)\nin\n #\"Added Custom1\"", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -403,15 +437,13 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", "changeType": "UPSERT", - "aspectName": "globalTags", + "aspectName": "subTypes", "aspect": { "json": { - "tags": [ - { - "tag": "urn:li:tag:Certified" - } + "typeNames": [ + "Table" ] } }, @@ -425,19 +457,16 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "datasetProperties", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:e3dc21b5c79f9d594f639a9f57d7f2c3", - "urn": "urn:li:container:e3dc21b5c79f9d594f639a9f57d7f2c3" - }, - { - "id": "urn:li:container:977b804137a1d2bf897ff1bbf440a1cc", - "urn": "urn:li:container:977b804137a1d2bf897ff1bbf440a1cc" - } - ] + "customProperties": { + "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details", + "name": "ms_sql_native_table", + "description": "", + "tags": [] } }, "systemMetadata": { @@ -448,14 +477,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.revenue,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "status", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = Sql.Database(\"database.sql.net\", \"analytics\", [Query=\"select * from analytics.sales_revenue\"])\nin\n Source", - "viewLanguage": "m_query" + "removed": false } }, "systemMetadata": { @@ -466,12 +493,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.revenue,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", "changeType": "UPSERT", "aspectName": "schemaMetadata", "aspect": { "json": { - "schemaName": "revenue", + "schemaName": "ms_sql_native_table", "platform": "urn:li:dataPlatform:powerbi", "version": 0, "created": { @@ -488,59 +515,7 @@ "rawSchema": "" } }, - "fields": [ - { - "fieldPath": "op_item_id", - "nullable": false, - "description": "op_item_id column description", - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "String", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "op_id", - "nullable": false, - "description": "op_id description", - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "String", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "op_product_name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "String", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "event_name", - "nullable": false, - "description": "let\n x column description", - "type": { - "type": { - "com.linkedin.schema.NullType": {} - } - }, - "nativeDataType": "measure", - "recursive": false, - "isPartOfKey": false - } - ] + "fields": [] } }, "systemMetadata": { @@ -550,19 +525,14 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.revenue,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "chartKey", "aspect": { "json": { - "customProperties": { - "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed" - }, - "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details", - "name": "revenue", - "description": "", - "tags": [] + "dashboardTool": "powerbi", + "chartId": "charts.23212598-23b5-4980-87cc-5fc0ecd84385" } }, "systemMetadata": { @@ -598,29 +568,15 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.revenue,DEV)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.revenue,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "globalTags", "aspect": { "json": { - "typeNames": [ - "Table" + "tags": [ + { + "tag": "urn:li:tag:Certified" + } ] } }, @@ -632,7 +588,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.revenue,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -650,14 +606,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.revenue,DEV)", "changeType": "UPSERT", - "aspectName": "globalTags", + "aspectName": "viewProperties", "aspect": { "json": { - "tags": [ - { - "tag": "urn:li:tag:Certified" - } - ] + "materialized": false, + "viewLogic": "let\n Source = Sql.Database(\"database.sql.net\", \"analytics\", [Query=\"select * from analytics.sales_revenue\"])\nin\n Source", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -668,20 +622,13 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.revenue,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "subTypes", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:e3dc21b5c79f9d594f639a9f57d7f2c3", - "urn": "urn:li:container:e3dc21b5c79f9d594f639a9f57d7f2c3" - }, - { - "id": "urn:li:container:977b804137a1d2bf897ff1bbf440a1cc", - "urn": "urn:li:container:977b804137a1d2bf897ff1bbf440a1cc" - } + "typeNames": [ + "Table" ] } }, @@ -692,30 +639,19 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.revenue,DEV)", "changeType": "UPSERT", - "aspectName": "chartInfo", + "aspectName": "datasetProperties", "aspect": { "json": { "customProperties": { - "createdFrom": "Dataset", - "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", - "datasetWebUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details" - }, - "title": "test_tile", - "description": "test_tile", - "lastModified": { - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } + "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed" }, - "inputs": [] + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details", + "name": "revenue", + "description": "", + "tags": [] } }, "systemMetadata": { @@ -725,8 +661,8 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.revenue,DEV)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -741,14 +677,82 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.revenue,DEV)", "changeType": "UPSERT", - "aspectName": "chartKey", + "aspectName": "schemaMetadata", "aspect": { "json": { - "dashboardTool": "powerbi", - "chartId": "charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0" + "schemaName": "revenue", + "platform": "urn:li:dataPlatform:powerbi", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "op_item_id", + "nullable": false, + "description": "op_item_id column description", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "op_id", + "nullable": false, + "description": "op_id description", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "op_product_name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "event_name", + "nullable": false, + "description": "let\n x column description", + "type": { + "type": { + "com.linkedin.schema.NullType": {} + } + }, + "nativeDataType": "measure", + "recursive": false, + "isPartOfKey": false + } + ] } }, "systemMetadata": { @@ -759,7 +763,7 @@ }, { "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", "aspectName": "browsePaths", "aspect": { @@ -776,13 +780,22 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.revenue,DEV)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "upstreamLineage", "aspect": { "json": { - "container": "urn:li:container:e3dc21b5c79f9d594f639a9f57d7f2c3" + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:mssql,reporting-db.analytics.analytics.sales_revenue,PROD)", + "type": "TRANSFORMED" + } + ] } }, "systemMetadata": { @@ -795,17 +808,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.revenue,DEV)", "changeType": "UPSERT", - "aspectName": "upstreamLineage", + "aspectName": "globalTags", "aspect": { "json": { - "upstreams": [ + "tags": [ { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:mssql,reporting-db.analytics.analytics.sales_revenue,PROD)", - "type": "TRANSFORMED" + "tag": "urn:li:tag:Certified" } ] } @@ -817,18 +825,13 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:e3dc21b5c79f9d594f639a9f57d7f2c3", - "urn": "urn:li:container:e3dc21b5c79f9d594f639a9f57d7f2c3" - } - ] + "container": "urn:li:container:977b804137a1d2bf897ff1bbf440a1cc" } }, "systemMetadata": { @@ -838,39 +841,14 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.revenue,DEV)", "changeType": "UPSERT", - "aspectName": "chartInfo", + "aspectName": "subTypes", "aspect": { "json": { - "customProperties": { - "createdFrom": "Dataset", - "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed", - "datasetWebUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details" - }, - "title": "yearly_sales", - "description": "yearly_sales", - "lastModified": { - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - }, - "inputs": [ - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.revenue,DEV)" - } + "typeNames": [ + "Table" ] } }, @@ -881,13 +859,22 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "browsePathsV2", "aspect": { "json": { - "removed": false + "path": [ + { + "id": "urn:li:container:e3dc21b5c79f9d594f639a9f57d7f2c3", + "urn": "urn:li:container:e3dc21b5c79f9d594f639a9f57d7f2c3" + }, + { + "id": "urn:li:container:977b804137a1d2bf897ff1bbf440a1cc", + "urn": "urn:li:container:977b804137a1d2bf897ff1bbf440a1cc" + } + ] } }, "systemMetadata": { @@ -897,15 +884,13 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.revenue,DEV)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "container", "aspect": { "json": { - "typeNames": [ - "PowerBI Tile" - ] + "container": "urn:li:container:977b804137a1d2bf897ff1bbf440a1cc" } }, "systemMetadata": { @@ -915,14 +900,22 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", "changeType": "UPSERT", - "aspectName": "chartKey", + "aspectName": "browsePathsV2", "aspect": { "json": { - "dashboardTool": "powerbi", - "chartId": "charts.23212598-23b5-4980-87cc-5fc0ecd84385" + "path": [ + { + "id": "urn:li:container:e3dc21b5c79f9d594f639a9f57d7f2c3", + "urn": "urn:li:container:e3dc21b5c79f9d594f639a9f57d7f2c3" + }, + { + "id": "urn:li:container:977b804137a1d2bf897ff1bbf440a1cc", + "urn": "urn:li:container:977b804137a1d2bf897ff1bbf440a1cc" + } + ] } }, "systemMetadata": { @@ -932,14 +925,21 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.revenue,DEV)", "changeType": "UPSERT", - "aspectName": "browsePaths", + "aspectName": "browsePathsV2", "aspect": { "json": { - "paths": [ - "/powerbi/demo-workspace" + "path": [ + { + "id": "urn:li:container:e3dc21b5c79f9d594f639a9f57d7f2c3", + "urn": "urn:li:container:e3dc21b5c79f9d594f639a9f57d7f2c3" + }, + { + "id": "urn:li:container:977b804137a1d2bf897ff1bbf440a1cc", + "urn": "urn:li:container:977b804137a1d2bf897ff1bbf440a1cc" + } ] } }, diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_platform_instance_ingest.json b/metadata-ingestion/tests/integration/powerbi/golden_test_platform_instance_ingest.json index 6da5f5781112e..e75557982da58 100644 --- a/metadata-ingestion/tests/integration/powerbi/golden_test_platform_instance_ingest.json +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_platform_instance_ingest.json @@ -17,6 +17,40 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,aws-ap-south-1.dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "changeType": "UPSERT", + "aspectName": "browsePaths", + "aspect": { + "json": { + "paths": [ + "/powerbi/demo-workspace" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,aws-ap-south-1.library-dataset.public_issue_history,DEV)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,aws-ap-south-1.library-dataset.public_issue_history,DEV)", @@ -40,14 +74,67 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,aws-ap-south-1.library-dataset.public_issue_history,DEV)", - "changeType": "UPSERT", - "aspectName": "status", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,aws-ap-south-1.dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "changeType": "PATCH", + "aspectName": "dashboardInfo", "aspect": { - "json": { - "removed": false - } + "json": [ + { + "op": "add", + "path": "/customProperties/chartCount", + "value": "2" + }, + { + "op": "add", + "path": "/customProperties/workspaceName", + "value": "demo-workspace" + }, + { + "op": "add", + "path": "/customProperties/workspaceId", + "value": "64ED5CAD-7C10-4684-8180-826122881108" + }, + { + "op": "add", + "path": "/title", + "value": "test_dashboard" + }, + { + "op": "add", + "path": "/description", + "value": "Description of test dashboard" + }, + { + "op": "add", + "path": "/charts/urn:li:chart:(powerbi,aws-ap-south-1.charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "value": "urn:li:chart:(powerbi,aws-ap-south-1.charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)" + }, + { + "op": "add", + "path": "/charts/urn:li:chart:(powerbi,aws-ap-south-1.charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "value": "urn:li:chart:(powerbi,aws-ap-south-1.charts.23212598-23b5-4980-87cc-5fc0ecd84385)" + }, + { + "op": "add", + "path": "/dashboardUrl", + "value": "https://localhost/dashboards/web/1" + }, + { + "op": "add", + "path": "/lastModified", + "value": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + } + ] }, "systemMetadata": { "lastObserved": 1643871600000, @@ -74,15 +161,13 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,aws-ap-south-1.library-dataset.SNOWFLAKE_TESTTABLE,DEV)", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,aws-ap-south-1.dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "status", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = Snowflake.Databases(\"hp123rt5.ap-southeast-2.fakecomputing.com\",\"PBI_TEST_WAREHOUSE_PROD\",[Role=\"PBI_TEST_MEMBER\"]),\n PBI_TEST_Database = Source{[Name=\"PBI_TEST\",Kind=\"Database\"]}[Data],\n TEST_Schema = PBI_TEST_Database{[Name=\"TEST\",Kind=\"Schema\"]}[Data],\n TESTTABLE_Table = TEST_Schema{[Name=\"TESTTABLE\",Kind=\"Table\"]}[Data]\nin\n TESTTABLE_Table", - "viewLanguage": "m_query" + "removed": false } }, "systemMetadata": { @@ -92,19 +177,14 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,aws-ap-south-1.library-dataset.SNOWFLAKE_TESTTABLE,DEV)", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,aws-ap-south-1.dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "dashboardKey", "aspect": { "json": { - "customProperties": { - "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" - }, - "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "SNOWFLAKE_TESTTABLE", - "description": "Library dataset description", - "tags": [] + "dashboardTool": "powerbi", + "dashboardId": "powerbi.linkedin.com/dashboards/7D668CAD-7FFC-4505-9215-655BCA5BEBAE" } }, "systemMetadata": { @@ -117,10 +197,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,aws-ap-south-1.library-dataset.SNOWFLAKE_TESTTABLE,DEV)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "viewProperties", "aspect": { "json": { - "removed": false + "materialized": false, + "viewLogic": "let\n Source = Snowflake.Databases(\"hp123rt5.ap-southeast-2.fakecomputing.com\",\"PBI_TEST_WAREHOUSE_PROD\",[Role=\"PBI_TEST_MEMBER\"]),\n PBI_TEST_Database = Source{[Name=\"PBI_TEST\",Kind=\"Database\"]}[Data],\n TEST_Schema = PBI_TEST_Database{[Name=\"TEST\",Kind=\"Schema\"]}[Data],\n TESTTABLE_Table = TEST_Schema{[Name=\"TESTTABLE\",Kind=\"Table\"]}[Data]\nin\n TESTTABLE_Table", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -130,14 +212,20 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,aws-ap-south-1.library-dataset.SNOWFLAKE_TESTTABLE,DEV)", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,aws-ap-south-1.dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "Table" + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:powerbi,aws-ap-south-1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:powerbi,aws-ap-south-1)" + }, + { + "id": "demo-workspace" + } ] } }, @@ -149,14 +237,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,aws-ap-south-1.library-dataset.snowflake_native-query,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,aws-ap-south-1.library-dataset.SNOWFLAKE_TESTTABLE,DEV)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "status", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"SELECT#(lf)concat((UPPER(REPLACE(SELLER,'-',''))), MONTHID) as AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTHID) as CD_AGENT_KEY,#(lf) *#(lf)FROM#(lf)OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_APS_SME_UNITS_V4\", null, [EnableFolding=true]),\n #\"Added Conditional Column\" = Table.AddColumn(Source, \"SME Units ENT\", each if [DEAL_TYPE] = \"SME Unit\" then [UNIT] else 0),\n #\"Added Conditional Column1\" = Table.AddColumn(#\"Added Conditional Column\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" then [UNIT] else 0),\n #\"Removed Columns\" = Table.RemoveColumns(#\"Added Conditional Column1\",{\"Banklink Units\"}),\n #\"Added Custom\" = Table.AddColumn(#\"Removed Columns\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" and [SALES_TYPE] = \"3 - Upsell\"\nthen [UNIT]\n\nelse if [SALES_TYPE] = \"Adjusted BL Migration\"\nthen [UNIT]\n\nelse 0),\n #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"SME Units in $ (*$361)\", each if [DEAL_TYPE] = \"SME Unit\" \nand [SALES_TYPE] <> \"4 - Renewal\"\n then [UNIT] * 361\nelse 0),\n #\"Added Custom2\" = Table.AddColumn(#\"Added Custom1\", \"Banklink in $ (*$148)\", each [Banklink Units] * 148)\nin\n #\"Added Custom2\"", - "viewLanguage": "m_query" + "removed": false } }, "systemMetadata": { @@ -167,7 +253,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,aws-ap-south-1.library-dataset.snowflake_native-query,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,aws-ap-south-1.library-dataset.SNOWFLAKE_TESTTABLE,DEV)", "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { @@ -176,7 +262,7 @@ "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" }, "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "snowflake native-query", + "name": "SNOWFLAKE_TESTTABLE", "description": "Library dataset description", "tags": [] } @@ -189,12 +275,14 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,aws-ap-south-1.library-dataset.snowflake_native-query,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,aws-ap-south-1.library-dataset.big-query-with-parameter,DEV)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "viewProperties", "aspect": { "json": { - "removed": false + "materialized": false, + "viewLogic": "let\n Source = GoogleBigQuery.Database([BillingProject = #\"Parameter - Source\"]),\n#\"gcp-project\" = Source{[Name=#\"Parameter - Source\"]}[Data],\nuniversal_Schema = #\"gcp-project\"{[Name=\"universal\",Kind=\"Schema\"]}[Data],\nD_WH_DATE_Table = universal_Schema{[Name=\"D_WH_DATE\",Kind=\"Table\"]}[Data],\n#\"Filtered Rows\" = Table.SelectRows(D_WH_DATE_Table, each [D_DATE] > #datetime(2019, 9, 10, 0, 0, 0)),\n#\"Filtered Rows1\" = Table.SelectRows(#\"Filtered Rows\", each DateTime.IsInPreviousNHours([D_DATE], 87600))\n in \n#\"Filtered Rows1\"", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -207,12 +295,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,aws-ap-south-1.library-dataset.snowflake_native-query,DEV)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "viewProperties", "aspect": { "json": { - "typeNames": [ - "Table" - ] + "materialized": false, + "viewLogic": "let\n Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"SELECT#(lf)concat((UPPER(REPLACE(SELLER,'-',''))), MONTHID) as AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTHID) as CD_AGENT_KEY,#(lf) *#(lf)FROM#(lf)OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_APS_SME_UNITS_V4\", null, [EnableFolding=true]),\n #\"Added Conditional Column\" = Table.AddColumn(Source, \"SME Units ENT\", each if [DEAL_TYPE] = \"SME Unit\" then [UNIT] else 0),\n #\"Added Conditional Column1\" = Table.AddColumn(#\"Added Conditional Column\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" then [UNIT] else 0),\n #\"Removed Columns\" = Table.RemoveColumns(#\"Added Conditional Column1\",{\"Banklink Units\"}),\n #\"Added Custom\" = Table.AddColumn(#\"Removed Columns\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" and [SALES_TYPE] = \"3 - Upsell\"\nthen [UNIT]\n\nelse if [SALES_TYPE] = \"Adjusted BL Migration\"\nthen [UNIT]\n\nelse 0),\n #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"SME Units in $ (*$361)\", each if [DEAL_TYPE] = \"SME Unit\" \nand [SALES_TYPE] <> \"4 - Renewal\"\n then [UNIT] * 361\nelse 0),\n #\"Added Custom2\" = Table.AddColumn(#\"Added Custom1\", \"Banklink in $ (*$148)\", each [Banklink Units] * 148)\nin\n #\"Added Custom2\"", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -225,12 +313,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,aws-ap-south-1.library-dataset.big-query-with-parameter,DEV)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "status", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = GoogleBigQuery.Database([BillingProject = #\"Parameter - Source\"]),\n#\"gcp-project\" = Source{[Name=#\"Parameter - Source\"]}[Data],\nuniversal_Schema = #\"gcp-project\"{[Name=\"universal\",Kind=\"Schema\"]}[Data],\nD_WH_DATE_Table = universal_Schema{[Name=\"D_WH_DATE\",Kind=\"Table\"]}[Data],\n#\"Filtered Rows\" = Table.SelectRows(D_WH_DATE_Table, each [D_DATE] > #datetime(2019, 9, 10, 0, 0, 0)),\n#\"Filtered Rows1\" = Table.SelectRows(#\"Filtered Rows\", each DateTime.IsInPreviousNHours([D_DATE], 87600))\n in \n#\"Filtered Rows1\"", - "viewLanguage": "m_query" + "removed": false } }, "systemMetadata": { @@ -262,13 +348,27 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,aws-ap-south-1.library-dataset.big-query-with-parameter,DEV)", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,aws-ap-south-1.dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "ownership", "aspect": { "json": { - "removed": false + "owners": [ + { + "owner": "urn:li:corpuser:users.User1@foo.com", + "type": "NONE" + }, + { + "owner": "urn:li:corpuser:users.User2@foo.com", + "type": "NONE" + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } } }, "systemMetadata": { @@ -297,14 +397,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,aws-ap-south-1.library-dataset.snowflake_native-query-with-join,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,aws-ap-south-1.library-dataset.snowflake_native-query,DEV)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "status", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = Value.NativeQuery(Snowflake.Databases(\"xaa48144.snowflakecomputing.com\",\"GSL_TEST_WH\",[Role=\"ACCOUNTADMIN\"]){[Name=\"GSL_TEST_DB\"]}[Data], \"select A.name from GSL_TEST_DB.PUBLIC.SALES_ANALYST as A inner join GSL_TEST_DB.PUBLIC.SALES_FORECAST as B on A.name = B.name where startswith(A.name, 'mo')\", null, [EnableFolding=true])\nin\n Source", - "viewLanguage": "m_query" + "removed": false } }, "systemMetadata": { @@ -315,7 +413,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,aws-ap-south-1.library-dataset.snowflake_native-query-with-join,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,aws-ap-south-1.library-dataset.snowflake_native-query,DEV)", "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { @@ -324,7 +422,7 @@ "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" }, "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "snowflake native-query-with-join", + "name": "snowflake native-query", "description": "Library dataset description", "tags": [] } @@ -337,12 +435,14 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,aws-ap-south-1.library-dataset.snowflake_native-query-with-join,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,aws-ap-south-1.library-dataset.job-history,DEV)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "viewProperties", "aspect": { "json": { - "removed": false + "materialized": false, + "viewLogic": "let\n Source = Oracle.Database(\"localhost:1521/salesdb.domain.com\", [HierarchicalNavigation=true]), HR = Source{[Schema=\"HR\"]}[Data], EMPLOYEES1 = HR{[Name=\"EMPLOYEES\"]}[Data] \n in EMPLOYEES1", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -353,7 +453,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,aws-ap-south-1.library-dataset.snowflake_native-query-with-join,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,aws-ap-south-1.library-dataset.SNOWFLAKE_TESTTABLE,DEV)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -373,12 +473,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,aws-ap-south-1.library-dataset.job-history,DEV)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "status", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = Oracle.Database(\"localhost:1521/salesdb.domain.com\", [HierarchicalNavigation=true]), HR = Source{[Schema=\"HR\"]}[Data], EMPLOYEES1 = HR{[Name=\"EMPLOYEES\"]}[Data] \n in EMPLOYEES1", - "viewLanguage": "m_query" + "removed": false } }, "systemMetadata": { @@ -411,12 +509,14 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,aws-ap-south-1.library-dataset.job-history,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,aws-ap-south-1.library-dataset.snowflake_native-query,DEV)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "subTypes", "aspect": { "json": { - "removed": false + "typeNames": [ + "Table" + ] } }, "systemMetadata": { @@ -444,15 +544,13 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,aws-ap-south-1.library-dataset.postgres_test_table,DEV)", + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:users.User2@foo.com", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "corpUserKey", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = PostgreSQL.Database(\"localhost\" , \"mics\" ),\n public_order_date = Source{[Schema=\"public\",Item=\"order_date\"]}[Data] \n in \n public_order_date", - "viewLanguage": "m_query" + "username": "User2@foo.com" } }, "systemMetadata": { @@ -462,19 +560,13 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,aws-ap-south-1.library-dataset.postgres_test_table,DEV)", + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:users.User2@foo.com", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "status", "aspect": { "json": { - "customProperties": { - "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" - }, - "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "postgres_test_table", - "description": "Library dataset description", - "tags": [] + "removed": false } }, "systemMetadata": { @@ -484,13 +576,13 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,aws-ap-south-1.library-dataset.postgres_test_table,DEV)", + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:users.User1@foo.com", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "corpUserKey", "aspect": { "json": { - "removed": false + "username": "User1@foo.com" } }, "systemMetadata": { @@ -500,15 +592,13 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,aws-ap-south-1.library-dataset.postgres_test_table,DEV)", + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:users.User1@foo.com", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "status", "aspect": { "json": { - "typeNames": [ - "Table" - ] + "removed": false } }, "systemMetadata": { @@ -518,15 +608,15 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,aws-ap-south-1.hr_pbi_test.dbo_book_issue,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,aws-ap-south-1.charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "browsePaths", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = Sql.Database(\"localhost\", \"library\"),\n dbo_book_issue = Source{[Schema=\"dbo\",Item=\"book_issue\"]}[Data]\n in dbo_book_issue", - "viewLanguage": "m_query" + "paths": [ + "/powerbi/demo-workspace" + ] } }, "systemMetadata": { @@ -536,19 +626,13 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,aws-ap-south-1.hr_pbi_test.dbo_book_issue,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,aws-ap-south-1.charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "status", "aspect": { "json": { - "customProperties": { - "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed" - }, - "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details", - "name": "dbo_book_issue", - "description": "hr pbi test description", - "tags": [] + "removed": false } }, "systemMetadata": { @@ -559,12 +643,14 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,aws-ap-south-1.hr_pbi_test.dbo_book_issue,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,aws-ap-south-1.library-dataset.snowflake_native-query-with-join,DEV)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "viewProperties", "aspect": { "json": { - "removed": false + "materialized": false, + "viewLogic": "let\n Source = Value.NativeQuery(Snowflake.Databases(\"xaa48144.snowflakecomputing.com\",\"GSL_TEST_WH\",[Role=\"ACCOUNTADMIN\"]){[Name=\"GSL_TEST_DB\"]}[Data], \"select A.name from GSL_TEST_DB.PUBLIC.SALES_ANALYST as A inner join GSL_TEST_DB.PUBLIC.SALES_FORECAST as B on A.name = B.name where startswith(A.name, 'mo')\", null, [EnableFolding=true])\nin\n Source", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -574,14 +660,14 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,aws-ap-south-1.hr_pbi_test.dbo_book_issue,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,aws-ap-south-1.charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { "json": { "typeNames": [ - "Table" + "PowerBI Tile" ] } }, @@ -593,14 +679,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,aws-ap-south-1.hr_pbi_test.ms_sql_native_table,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,aws-ap-south-1.library-dataset.snowflake_native-query-with-join,DEV)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "status", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = Sql.Database(\"AUPRDWHDB\", \"COMMOPSDB\", [Query=\"select *,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTH_WID) as CD_AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,'-',''))), MONTH_WID) as AGENT_KEY#(lf)#(lf)from V_PS_CD_RETENTION\", CommandTimeout=#duration(0, 1, 30, 0)]),\n #\"Changed Type\" = Table.TransformColumnTypes(Source,{{\"mth_date\", type date}}),\n #\"Added Custom\" = Table.AddColumn(#\"Changed Type\", \"Month\", each Date.Month([mth_date])),\n #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"TPV Opening\", each if [Month] = 1 then [TPV_AMV_OPENING]\nelse if [Month] = 2 then 0\nelse if [Month] = 3 then 0\nelse if [Month] = 4 then [TPV_AMV_OPENING]\nelse if [Month] = 5 then 0\nelse if [Month] = 6 then 0\nelse if [Month] = 7 then [TPV_AMV_OPENING]\nelse if [Month] = 8 then 0\nelse if [Month] = 9 then 0\nelse if [Month] = 10 then [TPV_AMV_OPENING]\nelse if [Month] = 11 then 0\nelse if [Month] = 12 then 0\n\nelse 0)\nin\n #\"Added Custom1\"", - "viewLanguage": "m_query" + "removed": false } }, "systemMetadata": { @@ -611,17 +695,17 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,aws-ap-south-1.hr_pbi_test.ms_sql_native_table,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,aws-ap-south-1.library-dataset.snowflake_native-query-with-join,DEV)", "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { "json": { "customProperties": { - "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed" + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" }, - "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details", - "name": "ms_sql_native_table", - "description": "hr pbi test description", + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", + "name": "snowflake native-query-with-join", + "description": "Library dataset description", "tags": [] } }, @@ -632,13 +716,21 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,aws-ap-south-1.hr_pbi_test.ms_sql_native_table,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,aws-ap-south-1.charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "browsePathsV2", "aspect": { "json": { - "removed": false + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:powerbi,aws-ap-south-1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:powerbi,aws-ap-south-1)" + }, + { + "id": "demo-workspace" + } + ] } }, "systemMetadata": { @@ -649,7 +741,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,aws-ap-south-1.hr_pbi_test.ms_sql_native_table,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,aws-ap-south-1.library-dataset.snowflake_native-query-with-join,DEV)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -666,29 +758,14 @@ } }, { - "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:users.User1@foo.com", - "changeType": "UPSERT", - "aspectName": "corpUserKey", - "aspect": { - "json": { - "username": "User1@foo.com" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:users.User2@foo.com", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,aws-ap-south-1.charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", "changeType": "UPSERT", - "aspectName": "corpUserKey", + "aspectName": "chartKey", "aspect": { "json": { - "username": "User2@foo.com" + "dashboardTool": "powerbi", + "chartId": "aws-ap-south-1.charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0" } }, "systemMetadata": { @@ -753,13 +830,15 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,aws-ap-south-1.charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,aws-ap-south-1.library-dataset.postgres_test_table,DEV)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "viewProperties", "aspect": { "json": { - "removed": false + "materialized": false, + "viewLogic": "let\n Source = PostgreSQL.Database(\"localhost\" , \"mics\" ),\n public_order_date = Source{[Schema=\"public\",Item=\"order_date\"]}[Data] \n in \n public_order_date", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -770,13 +849,13 @@ }, { "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,aws-ap-south-1.charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "entityUrn": "urn:li:chart:(powerbi,aws-ap-south-1.charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePaths", "aspect": { "json": { - "typeNames": [ - "PowerBI Tile" + "paths": [ + "/powerbi/demo-workspace" ] } }, @@ -787,32 +866,13 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,aws-ap-south-1.charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", - "changeType": "UPSERT", - "aspectName": "chartKey", - "aspect": { - "json": { - "dashboardTool": "powerbi", - "chartId": "aws-ap-south-1.charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,aws-ap-south-1.charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,aws-ap-south-1.library-dataset.postgres_test_table,DEV)", "changeType": "UPSERT", - "aspectName": "browsePaths", + "aspectName": "status", "aspect": { "json": { - "paths": [ - "/powerbi/demo-workspace" - ] + "removed": false } }, "systemMetadata": { @@ -822,21 +882,19 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,aws-ap-south-1.charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,aws-ap-south-1.library-dataset.postgres_test_table,DEV)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "datasetProperties", "aspect": { "json": { - "path": [ - { - "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:powerbi,aws-ap-south-1)", - "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:powerbi,aws-ap-south-1)" - }, - { - "id": "demo-workspace" - } - ] + "customProperties": { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", + "name": "postgres_test_table", + "description": "Library dataset description", + "tags": [] } }, "systemMetadata": { @@ -849,34 +907,10 @@ "entityType": "chart", "entityUrn": "urn:li:chart:(powerbi,aws-ap-south-1.charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "chartInfo", + "aspectName": "status", "aspect": { "json": { - "customProperties": { - "createdFrom": "Dataset", - "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed", - "datasetWebUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details" - }, - "title": "yearly_sales", - "description": "yearly_sales", - "lastModified": { - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - }, - "inputs": [ - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,aws-ap-south-1.hr_pbi_test.dbo_book_issue,DEV)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,aws-ap-south-1.hr_pbi_test.ms_sql_native_table,DEV)" - } - ] + "removed": false } }, "systemMetadata": { @@ -886,13 +920,15 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,aws-ap-south-1.charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,aws-ap-south-1.library-dataset.postgres_test_table,DEV)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "subTypes", "aspect": { "json": { - "removed": false + "typeNames": [ + "Table" + ] } }, "systemMetadata": { @@ -905,11 +941,12 @@ "entityType": "chart", "entityUrn": "urn:li:chart:(powerbi,aws-ap-south-1.charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "chartKey", + "aspectName": "subTypes", "aspect": { "json": { - "dashboardTool": "powerbi", - "chartId": "aws-ap-south-1.charts.23212598-23b5-4980-87cc-5fc0ecd84385" + "typeNames": [ + "PowerBI Tile" + ] } }, "systemMetadata": { @@ -922,11 +959,17 @@ "entityType": "chart", "entityUrn": "urn:li:chart:(powerbi,aws-ap-south-1.charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "PowerBI Tile" + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:powerbi,aws-ap-south-1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:powerbi,aws-ap-south-1)" + }, + { + "id": "demo-workspace" + } ] } }, @@ -940,12 +983,11 @@ "entityType": "chart", "entityUrn": "urn:li:chart:(powerbi,aws-ap-south-1.charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "browsePaths", + "aspectName": "chartKey", "aspect": { "json": { - "paths": [ - "/powerbi/demo-workspace" - ] + "dashboardTool": "powerbi", + "chartId": "aws-ap-south-1.charts.23212598-23b5-4980-87cc-5fc0ecd84385" } }, "systemMetadata": { @@ -958,16 +1000,32 @@ "entityType": "chart", "entityUrn": "urn:li:chart:(powerbi,aws-ap-south-1.charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "chartInfo", "aspect": { "json": { - "path": [ + "customProperties": { + "createdFrom": "Dataset", + "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed", + "datasetWebUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details" + }, + "title": "yearly_sales", + "description": "yearly_sales", + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + }, + "inputs": [ { - "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:powerbi,aws-ap-south-1)", - "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:powerbi,aws-ap-south-1)" + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,aws-ap-south-1.hr_pbi_test.dbo_book_issue,DEV)" }, { - "id": "demo-workspace" + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,aws-ap-south-1.hr_pbi_test.ms_sql_native_table,DEV)" } ] } @@ -979,15 +1037,19 @@ } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,aws-ap-south-1.dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,aws-ap-south-1.hr_pbi_test.dbo_book_issue,DEV)", "changeType": "UPSERT", - "aspectName": "browsePaths", + "aspectName": "datasetProperties", "aspect": { "json": { - "paths": [ - "/powerbi/demo-workspace" - ] + "customProperties": { + "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details", + "name": "dbo_book_issue", + "description": "hr pbi test description", + "tags": [] } }, "systemMetadata": { @@ -997,67 +1059,16 @@ } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,aws-ap-south-1.dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", - "changeType": "PATCH", - "aspectName": "dashboardInfo", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,aws-ap-south-1.hr_pbi_test.dbo_book_issue,DEV)", + "changeType": "UPSERT", + "aspectName": "subTypes", "aspect": { - "json": [ - { - "op": "add", - "path": "/customProperties/chartCount", - "value": "2" - }, - { - "op": "add", - "path": "/customProperties/workspaceName", - "value": "demo-workspace" - }, - { - "op": "add", - "path": "/customProperties/workspaceId", - "value": "64ED5CAD-7C10-4684-8180-826122881108" - }, - { - "op": "add", - "path": "/title", - "value": "test_dashboard" - }, - { - "op": "add", - "path": "/description", - "value": "Description of test dashboard" - }, - { - "op": "add", - "path": "/charts/urn:li:chart:(powerbi,aws-ap-south-1.charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", - "value": "urn:li:chart:(powerbi,aws-ap-south-1.charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)" - }, - { - "op": "add", - "path": "/charts/urn:li:chart:(powerbi,aws-ap-south-1.charts.23212598-23b5-4980-87cc-5fc0ecd84385)", - "value": "urn:li:chart:(powerbi,aws-ap-south-1.charts.23212598-23b5-4980-87cc-5fc0ecd84385)" - }, - { - "op": "add", - "path": "/dashboardUrl", - "value": "https://localhost/dashboards/web/1" - }, - { - "op": "add", - "path": "/lastModified", - "value": { - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - } - ] + "json": { + "typeNames": [ + "Table" + ] + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -1066,13 +1077,15 @@ } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,aws-ap-south-1.dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,aws-ap-south-1.hr_pbi_test.dbo_book_issue,DEV)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "viewProperties", "aspect": { "json": { - "removed": false + "materialized": false, + "viewLogic": "let\n Source = Sql.Database(\"localhost\", \"library\"),\n dbo_book_issue = Source{[Schema=\"dbo\",Item=\"book_issue\"]}[Data]\n in dbo_book_issue", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -1082,14 +1095,13 @@ } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,aws-ap-south-1.dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,aws-ap-south-1.hr_pbi_test.dbo_book_issue,DEV)", "changeType": "UPSERT", - "aspectName": "dashboardKey", + "aspectName": "status", "aspect": { "json": { - "dashboardTool": "powerbi", - "dashboardId": "powerbi.linkedin.com/dashboards/7D668CAD-7FFC-4505-9215-655BCA5BEBAE" + "removed": false } }, "systemMetadata": { @@ -1099,27 +1111,19 @@ } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,aws-ap-south-1.dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,aws-ap-south-1.hr_pbi_test.ms_sql_native_table,DEV)", "changeType": "UPSERT", - "aspectName": "ownership", + "aspectName": "datasetProperties", "aspect": { "json": { - "owners": [ - { - "owner": "urn:li:corpuser:users.User1@foo.com", - "type": "NONE" - }, - { - "owner": "urn:li:corpuser:users.User2@foo.com", - "type": "NONE" - } - ], - "ownerTypes": {}, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } + "customProperties": { + "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details", + "name": "ms_sql_native_table", + "description": "hr pbi test description", + "tags": [] } }, "systemMetadata": { @@ -1129,20 +1133,14 @@ } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,aws-ap-south-1.dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,aws-ap-south-1.hr_pbi_test.ms_sql_native_table,DEV)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "subTypes", "aspect": { "json": { - "path": [ - { - "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:powerbi,aws-ap-south-1)", - "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:powerbi,aws-ap-south-1)" - }, - { - "id": "demo-workspace" - } + "typeNames": [ + "Table" ] } }, @@ -1153,13 +1151,15 @@ } }, { - "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:users.User1@foo.com", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,aws-ap-south-1.hr_pbi_test.ms_sql_native_table,DEV)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "viewProperties", "aspect": { "json": { - "removed": false + "materialized": false, + "viewLogic": "let\n Source = Sql.Database(\"AUPRDWHDB\", \"COMMOPSDB\", [Query=\"select *,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTH_WID) as CD_AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,'-',''))), MONTH_WID) as AGENT_KEY#(lf)#(lf)from V_PS_CD_RETENTION\", CommandTimeout=#duration(0, 1, 30, 0)]),\n #\"Changed Type\" = Table.TransformColumnTypes(Source,{{\"mth_date\", type date}}),\n #\"Added Custom\" = Table.AddColumn(#\"Changed Type\", \"Month\", each Date.Month([mth_date])),\n #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"TPV Opening\", each if [Month] = 1 then [TPV_AMV_OPENING]\nelse if [Month] = 2 then 0\nelse if [Month] = 3 then 0\nelse if [Month] = 4 then [TPV_AMV_OPENING]\nelse if [Month] = 5 then 0\nelse if [Month] = 6 then 0\nelse if [Month] = 7 then [TPV_AMV_OPENING]\nelse if [Month] = 8 then 0\nelse if [Month] = 9 then 0\nelse if [Month] = 10 then [TPV_AMV_OPENING]\nelse if [Month] = 11 then 0\nelse if [Month] = 12 then 0\n\nelse 0)\nin\n #\"Added Custom1\"", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -1169,8 +1169,8 @@ } }, { - "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:users.User2@foo.com", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,aws-ap-south-1.hr_pbi_test.ms_sql_native_table,DEV)", "changeType": "UPSERT", "aspectName": "status", "aspect": { diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_profiling.json b/metadata-ingestion/tests/integration/powerbi/golden_test_profiling.json index b8963a0d7782d..87e89e5ed74f2 100644 --- a/metadata-ingestion/tests/integration/powerbi/golden_test_profiling.json +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_profiling.json @@ -39,6 +39,40 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.articles,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1645599600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.articles,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1645599600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.articles,PROD)", @@ -107,40 +141,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.articles,PROD)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Table" - ] - } - }, - "systemMetadata": { - "lastObserved": 1645599600000, - "runId": "powerbi-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.articles,PROD)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1645599600000, - "runId": "powerbi-test", - "lastRunId": "no-run-id-provided" - } -}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.articles,PROD)", diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_scan_all_workspaces.json b/metadata-ingestion/tests/integration/powerbi/golden_test_scan_all_workspaces.json index e327ca695beb7..bc0feef4a127d 100644 --- a/metadata-ingestion/tests/integration/powerbi/golden_test_scan_all_workspaces.json +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_scan_all_workspaces.json @@ -17,6 +17,77 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "changeType": "UPSERT", + "aspectName": "chartInfo", + "aspect": { + "json": { + "customProperties": { + "createdFrom": "Dataset", + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", + "datasetWebUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details" + }, + "title": "test_tile", + "description": "test_tile", + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + }, + "inputs": [ + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", @@ -40,8 +111,8 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -74,15 +145,15 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = Snowflake.Databases(\"hp123rt5.ap-southeast-2.fakecomputing.com\",\"PBI_TEST_WAREHOUSE_PROD\",[Role=\"PBI_TEST_MEMBER\"]),\n PBI_TEST_Database = Source{[Name=\"PBI_TEST\",Kind=\"Database\"]}[Data],\n TEST_Schema = PBI_TEST_Database{[Name=\"TEST\",Kind=\"Schema\"]}[Data],\n TESTTABLE_Table = TEST_Schema{[Name=\"TESTTABLE\",Kind=\"Table\"]}[Data]\nin\n TESTTABLE_Table", - "viewLanguage": "m_query" + "typeNames": [ + "PowerBI Tile" + ] } }, "systemMetadata": { @@ -92,19 +163,14 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "chartKey", "aspect": { "json": { - "customProperties": { - "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" - }, - "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "SNOWFLAKE_TESTTABLE", - "description": "Library dataset description", - "tags": [] + "dashboardTool": "powerbi", + "chartId": "charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0" } }, "systemMetadata": { @@ -117,10 +183,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "viewProperties", "aspect": { "json": { - "removed": false + "materialized": false, + "viewLogic": "let\n Source = Snowflake.Databases(\"hp123rt5.ap-southeast-2.fakecomputing.com\",\"PBI_TEST_WAREHOUSE_PROD\",[Role=\"PBI_TEST_MEMBER\"]),\n PBI_TEST_Database = Source{[Name=\"PBI_TEST\",Kind=\"Database\"]}[Data],\n TEST_Schema = PBI_TEST_Database{[Name=\"TEST\",Kind=\"Schema\"]}[Data],\n TESTTABLE_Table = TEST_Schema{[Name=\"TESTTABLE\",Kind=\"Table\"]}[Data]\nin\n TESTTABLE_Table", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -130,14 +198,14 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePaths", "aspect": { "json": { - "typeNames": [ - "Table" + "paths": [ + "/powerbi/demo-workspace" ] } }, @@ -149,14 +217,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "status", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"SELECT#(lf)concat((UPPER(REPLACE(SELLER,'-',''))), MONTHID) as AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTHID) as CD_AGENT_KEY,#(lf) *#(lf)FROM#(lf)OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_APS_SME_UNITS_V4\", null, [EnableFolding=true]),\n #\"Added Conditional Column\" = Table.AddColumn(Source, \"SME Units ENT\", each if [DEAL_TYPE] = \"SME Unit\" then [UNIT] else 0),\n #\"Added Conditional Column1\" = Table.AddColumn(#\"Added Conditional Column\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" then [UNIT] else 0),\n #\"Removed Columns\" = Table.RemoveColumns(#\"Added Conditional Column1\",{\"Banklink Units\"}),\n #\"Added Custom\" = Table.AddColumn(#\"Removed Columns\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" and [SALES_TYPE] = \"3 - Upsell\"\nthen [UNIT]\n\nelse if [SALES_TYPE] = \"Adjusted BL Migration\"\nthen [UNIT]\n\nelse 0),\n #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"SME Units in $ (*$361)\", each if [DEAL_TYPE] = \"SME Unit\" \nand [SALES_TYPE] <> \"4 - Renewal\"\n then [UNIT] * 361\nelse 0),\n #\"Added Custom2\" = Table.AddColumn(#\"Added Custom1\", \"Banklink in $ (*$148)\", each [Banklink Units] * 148)\nin\n #\"Added Custom2\"", - "viewLanguage": "m_query" + "removed": false } }, "systemMetadata": { @@ -167,7 +233,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { @@ -176,7 +242,7 @@ "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" }, "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "snowflake native-query", + "name": "SNOWFLAKE_TESTTABLE", "description": "Library dataset description", "tags": [] } @@ -191,10 +257,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "viewProperties", "aspect": { "json": { - "removed": false + "materialized": false, + "viewLogic": "let\n Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"SELECT#(lf)concat((UPPER(REPLACE(SELLER,'-',''))), MONTHID) as AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTHID) as CD_AGENT_KEY,#(lf) *#(lf)FROM#(lf)OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_APS_SME_UNITS_V4\", null, [EnableFolding=true]),\n #\"Added Conditional Column\" = Table.AddColumn(Source, \"SME Units ENT\", each if [DEAL_TYPE] = \"SME Unit\" then [UNIT] else 0),\n #\"Added Conditional Column1\" = Table.AddColumn(#\"Added Conditional Column\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" then [UNIT] else 0),\n #\"Removed Columns\" = Table.RemoveColumns(#\"Added Conditional Column1\",{\"Banklink Units\"}),\n #\"Added Custom\" = Table.AddColumn(#\"Removed Columns\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" and [SALES_TYPE] = \"3 - Upsell\"\nthen [UNIT]\n\nelse if [SALES_TYPE] = \"Adjusted BL Migration\"\nthen [UNIT]\n\nelse 0),\n #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"SME Units in $ (*$361)\", each if [DEAL_TYPE] = \"SME Unit\" \nand [SALES_TYPE] <> \"4 - Renewal\"\n then [UNIT] * 361\nelse 0),\n #\"Added Custom2\" = Table.AddColumn(#\"Added Custom1\", \"Banklink in $ (*$148)\", each [Banklink Units] * 148)\nin\n #\"Added Custom2\"", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -204,14 +272,16 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "Table" + "path": [ + { + "id": "demo-workspace" + } ] } }, @@ -223,14 +293,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "status", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = GoogleBigQuery.Database([BillingProject = #\"Parameter - Source\"]),\n#\"gcp-project\" = Source{[Name=#\"Parameter - Source\"]}[Data],\nuniversal_Schema = #\"gcp-project\"{[Name=\"universal\",Kind=\"Schema\"]}[Data],\nD_WH_DATE_Table = universal_Schema{[Name=\"D_WH_DATE\",Kind=\"Table\"]}[Data],\n#\"Filtered Rows\" = Table.SelectRows(D_WH_DATE_Table, each [D_DATE] > #datetime(2019, 9, 10, 0, 0, 0)),\n#\"Filtered Rows1\" = Table.SelectRows(#\"Filtered Rows\", each DateTime.IsInPreviousNHours([D_DATE], 87600))\n in \n#\"Filtered Rows1\"", - "viewLanguage": "m_query" + "removed": false } }, "systemMetadata": { @@ -241,7 +309,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { @@ -250,7 +318,7 @@ "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" }, "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "big-query-with-parameter", + "name": "snowflake native-query", "description": "Library dataset description", "tags": [] } @@ -263,12 +331,14 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "subTypes", "aspect": { "json": { - "removed": false + "typeNames": [ + "Table" + ] } }, "systemMetadata": { @@ -279,7 +349,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -296,15 +366,37 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "chartInfo", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = Value.NativeQuery(Snowflake.Databases(\"xaa48144.snowflakecomputing.com\",\"GSL_TEST_WH\",[Role=\"ACCOUNTADMIN\"]){[Name=\"GSL_TEST_DB\"]}[Data], \"select A.name from GSL_TEST_DB.PUBLIC.SALES_ANALYST as A inner join GSL_TEST_DB.PUBLIC.SALES_FORECAST as B on A.name = B.name where startswith(A.name, 'mo')\", null, [EnableFolding=true])\nin\n Source", - "viewLanguage": "m_query" + "customProperties": { + "createdFrom": "Dataset", + "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed", + "datasetWebUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details" + }, + "title": "yearly_sales", + "description": "yearly_sales", + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + }, + "inputs": [ + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)" + } + ] } }, "systemMetadata": { @@ -314,19 +406,13 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "status", "aspect": { "json": { - "customProperties": { - "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" - }, - "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "snowflake native-query-with-join", - "description": "Library dataset description", - "tags": [] + "removed": false } }, "systemMetadata": { @@ -337,12 +423,14 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "viewProperties", "aspect": { "json": { - "removed": false + "materialized": false, + "viewLogic": "let\n Source = GoogleBigQuery.Database([BillingProject = #\"Parameter - Source\"]),\n#\"gcp-project\" = Source{[Name=#\"Parameter - Source\"]}[Data],\nuniversal_Schema = #\"gcp-project\"{[Name=\"universal\",Kind=\"Schema\"]}[Data],\nD_WH_DATE_Table = universal_Schema{[Name=\"D_WH_DATE\",Kind=\"Table\"]}[Data],\n#\"Filtered Rows\" = Table.SelectRows(D_WH_DATE_Table, each [D_DATE] > #datetime(2019, 9, 10, 0, 0, 0)),\n#\"Filtered Rows1\" = Table.SelectRows(#\"Filtered Rows\", each DateTime.IsInPreviousNHours([D_DATE], 87600))\n in \n#\"Filtered Rows1\"", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -352,15 +440,14 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "chartKey", "aspect": { "json": { - "typeNames": [ - "Table" - ] + "dashboardTool": "powerbi", + "chartId": "charts.23212598-23b5-4980-87cc-5fc0ecd84385" } }, "systemMetadata": { @@ -371,14 +458,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "status", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = Oracle.Database(\"localhost:1521/salesdb.domain.com\", [HierarchicalNavigation=true]), HR = Source{[Schema=\"HR\"]}[Data], EMPLOYEES1 = HR{[Name=\"EMPLOYEES\"]}[Data] \n in EMPLOYEES1", - "viewLanguage": "m_query" + "removed": false } }, "systemMetadata": { @@ -389,7 +474,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { @@ -398,7 +483,7 @@ "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" }, "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "job-history", + "name": "big-query-with-parameter", "description": "Library dataset description", "tags": [] } @@ -410,13 +495,15 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "subTypes", "aspect": { "json": { - "removed": false + "typeNames": [ + "PowerBI Tile" + ] } }, "systemMetadata": { @@ -427,7 +514,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -444,15 +531,15 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "browsePaths", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = PostgreSQL.Database(\"localhost\" , \"mics\" ),\n public_order_date = Source{[Schema=\"public\",Item=\"order_date\"]}[Data] \n in \n public_order_date", - "viewLanguage": "m_query" + "paths": [ + "/powerbi/demo-workspace" + ] } }, "systemMetadata": { @@ -462,19 +549,17 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "browsePathsV2", "aspect": { "json": { - "customProperties": { - "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" - }, - "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "postgres_test_table", - "description": "Library dataset description", - "tags": [] + "path": [ + { + "id": "demo-workspace" + } + ] } }, "systemMetadata": { @@ -485,12 +570,14 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "viewProperties", "aspect": { "json": { - "removed": false + "materialized": false, + "viewLogic": "let\n Source = Value.NativeQuery(Snowflake.Databases(\"xaa48144.snowflakecomputing.com\",\"GSL_TEST_WH\",[Role=\"ACCOUNTADMIN\"]){[Name=\"GSL_TEST_DB\"]}[Data], \"select A.name from GSL_TEST_DB.PUBLIC.SALES_ANALYST as A inner join GSL_TEST_DB.PUBLIC.SALES_FORECAST as B on A.name = B.name where startswith(A.name, 'mo')\", null, [EnableFolding=true])\nin\n Source", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -500,14 +587,14 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePaths", "aspect": { "json": { - "typeNames": [ - "Table" + "paths": [ + "/powerbi/demo-workspace" ] } }, @@ -519,14 +606,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "status", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = Sql.Database(\"localhost\", \"library\"),\n dbo_book_issue = Source{[Schema=\"dbo\",Item=\"book_issue\"]}[Data]\n in dbo_book_issue", - "viewLanguage": "m_query" + "removed": false } }, "systemMetadata": { @@ -537,17 +622,17 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { "json": { "customProperties": { - "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed" + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" }, - "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details", - "name": "dbo_book_issue", - "description": "hr pbi test description", + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", + "name": "snowflake native-query-with-join", + "description": "Library dataset description", "tags": [] } }, @@ -559,12 +644,14 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "viewProperties", "aspect": { "json": { - "removed": false + "materialized": false, + "viewLogic": "let\n Source = Sql.Database(\"AUPRDWHDB\", \"COMMOPSDB\", [Query=\"select *,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTH_WID) as CD_AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,'-',''))), MONTH_WID) as AGENT_KEY#(lf)#(lf)from V_PS_CD_RETENTION\", CommandTimeout=#duration(0, 1, 30, 0)]),\n #\"Changed Type\" = Table.TransformColumnTypes(Source,{{\"mth_date\", type date}}),\n #\"Added Custom\" = Table.AddColumn(#\"Changed Type\", \"Month\", each Date.Month([mth_date])),\n #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"TPV Opening\", each if [Month] = 1 then [TPV_AMV_OPENING]\nelse if [Month] = 2 then 0\nelse if [Month] = 3 then 0\nelse if [Month] = 4 then [TPV_AMV_OPENING]\nelse if [Month] = 5 then 0\nelse if [Month] = 6 then 0\nelse if [Month] = 7 then [TPV_AMV_OPENING]\nelse if [Month] = 8 then 0\nelse if [Month] = 9 then 0\nelse if [Month] = 10 then [TPV_AMV_OPENING]\nelse if [Month] = 11 then 0\nelse if [Month] = 12 then 0\n\nelse 0)\nin\n #\"Added Custom1\"", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -577,12 +664,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "viewProperties", "aspect": { "json": { - "typeNames": [ - "Table" - ] + "materialized": false, + "viewLogic": "let\n Source = Sql.Database(\"localhost\", \"library\"),\n dbo_book_issue = Source{[Schema=\"dbo\",Item=\"book_issue\"]}[Data]\n in dbo_book_issue", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -595,12 +682,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "status", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = Sql.Database(\"AUPRDWHDB\", \"COMMOPSDB\", [Query=\"select *,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTH_WID) as CD_AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,'-',''))), MONTH_WID) as AGENT_KEY#(lf)#(lf)from V_PS_CD_RETENTION\", CommandTimeout=#duration(0, 1, 30, 0)]),\n #\"Changed Type\" = Table.TransformColumnTypes(Source,{{\"mth_date\", type date}}),\n #\"Added Custom\" = Table.AddColumn(#\"Changed Type\", \"Month\", each Date.Month([mth_date])),\n #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"TPV Opening\", each if [Month] = 1 then [TPV_AMV_OPENING]\nelse if [Month] = 2 then 0\nelse if [Month] = 3 then 0\nelse if [Month] = 4 then [TPV_AMV_OPENING]\nelse if [Month] = 5 then 0\nelse if [Month] = 6 then 0\nelse if [Month] = 7 then [TPV_AMV_OPENING]\nelse if [Month] = 8 then 0\nelse if [Month] = 9 then 0\nelse if [Month] = 10 then [TPV_AMV_OPENING]\nelse if [Month] = 11 then 0\nelse if [Month] = 12 then 0\n\nelse 0)\nin\n #\"Added Custom1\"", - "viewLanguage": "m_query" + "removed": false } }, "systemMetadata": { @@ -632,14 +717,67 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", - "changeType": "UPSERT", - "aspectName": "status", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "changeType": "PATCH", + "aspectName": "dashboardInfo", "aspect": { - "json": { - "removed": false - } + "json": [ + { + "op": "add", + "path": "/customProperties/chartCount", + "value": "2" + }, + { + "op": "add", + "path": "/customProperties/workspaceName", + "value": "demo-workspace" + }, + { + "op": "add", + "path": "/customProperties/workspaceId", + "value": "64ED5CAD-7C10-4684-8180-826122881108" + }, + { + "op": "add", + "path": "/title", + "value": "test_dashboard" + }, + { + "op": "add", + "path": "/description", + "value": "Description of test dashboard" + }, + { + "op": "add", + "path": "/charts/urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "value": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)" + }, + { + "op": "add", + "path": "/charts/urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "value": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)" + }, + { + "op": "add", + "path": "/dashboardUrl", + "value": "https://localhost/dashboards/web/1" + }, + { + "op": "add", + "path": "/lastModified", + "value": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + } + ] }, "systemMetadata": { "lastObserved": 1643871600000, @@ -666,52 +804,13 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", "changeType": "UPSERT", - "aspectName": "chartInfo", + "aspectName": "status", "aspect": { "json": { - "customProperties": { - "createdFrom": "Dataset", - "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", - "datasetWebUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details" - }, - "title": "test_tile", - "description": "test_tile", - "lastModified": { - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - }, - "inputs": [ - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)" - } - ] + "removed": false } }, "systemMetadata": { @@ -721,13 +820,19 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "datasetProperties", "aspect": { "json": { - "removed": false + "customProperties": { + "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details", + "name": "dbo_book_issue", + "description": "hr pbi test description", + "tags": [] } }, "systemMetadata": { @@ -737,14 +842,14 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { "json": { "typeNames": [ - "PowerBI Tile" + "Table" ] } }, @@ -755,14 +860,15 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", "changeType": "UPSERT", - "aspectName": "chartKey", + "aspectName": "subTypes", "aspect": { "json": { - "dashboardTool": "powerbi", - "chartId": "charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0" + "typeNames": [ + "Table" + ] } }, "systemMetadata": { @@ -772,15 +878,13 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", "changeType": "UPSERT", - "aspectName": "browsePaths", + "aspectName": "status", "aspect": { "json": { - "paths": [ - "/powerbi/demo-workspace" - ] + "removed": false } }, "systemMetadata": { @@ -790,126 +894,14 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "demo-workspace" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", - "changeType": "UPSERT", - "aspectName": "chartInfo", - "aspect": { - "json": { - "customProperties": { - "createdFrom": "Dataset", - "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed", - "datasetWebUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details" - }, - "title": "yearly_sales", - "description": "yearly_sales", - "lastModified": { - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - }, - "inputs": [ - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", - "changeType": "UPSERT", - "aspectName": "chartKey", + "aspectName": "dashboardKey", "aspect": { "json": { "dashboardTool": "powerbi", - "chartId": "charts.23212598-23b5-4980-87cc-5fc0ecd84385" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "PowerBI Tile" - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", - "changeType": "UPSERT", - "aspectName": "browsePaths", - "aspect": { - "json": { - "paths": [ - "/powerbi/demo-workspace" - ] + "dashboardId": "powerbi.linkedin.com/dashboards/7D668CAD-7FFC-4505-9215-655BCA5BEBAE" } }, "systemMetadata": { @@ -919,8 +911,8 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -940,13 +932,13 @@ }, { "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-8FFC-4505-9215-655BCA5BEBAE)", "changeType": "UPSERT", "aspectName": "browsePaths", "aspect": { "json": { "paths": [ - "/powerbi/demo-workspace" + "/powerbi/second-demo-workspace" ] } }, @@ -958,7 +950,7 @@ }, { "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-8FFC-4505-9215-655BCA5BEBAE)", "changeType": "PATCH", "aspectName": "dashboardInfo", "aspect": { @@ -966,37 +958,22 @@ { "op": "add", "path": "/customProperties/chartCount", - "value": "2" + "value": "0" }, { "op": "add", "path": "/customProperties/workspaceName", - "value": "demo-workspace" + "value": "second-demo-workspace" }, { "op": "add", "path": "/customProperties/workspaceId", - "value": "64ED5CAD-7C10-4684-8180-826122881108" + "value": "64ED5CAD-7C22-4684-8180-826122881108" }, { "op": "add", "path": "/title", - "value": "test_dashboard" - }, - { - "op": "add", - "path": "/description", - "value": "Description of test dashboard" - }, - { - "op": "add", - "path": "/charts/urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", - "value": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)" - }, - { - "op": "add", - "path": "/charts/urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", - "value": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)" + "value": "test_dashboard2" }, { "op": "add", @@ -1027,7 +1004,7 @@ }, { "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-8FFC-4505-9215-655BCA5BEBAE)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -1042,14 +1019,15 @@ } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", "changeType": "UPSERT", - "aspectName": "dashboardKey", + "aspectName": "viewProperties", "aspect": { "json": { - "dashboardTool": "powerbi", - "dashboardId": "powerbi.linkedin.com/dashboards/7D668CAD-7FFC-4505-9215-655BCA5BEBAE" + "materialized": false, + "viewLogic": "let\n Source = PostgreSQL.Database(\"localhost\" , \"mics\" ),\n public_order_date = Source{[Schema=\"public\",Item=\"order_date\"]}[Data] \n in \n public_order_date", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -1059,17 +1037,15 @@ } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "viewProperties", "aspect": { "json": { - "path": [ - { - "id": "demo-workspace" - } - ] + "materialized": false, + "viewLogic": "let\n Source = Oracle.Database(\"localhost:1521/salesdb.domain.com\", [HierarchicalNavigation=true]), HR = Source{[Schema=\"HR\"]}[Data], EMPLOYEES1 = HR{[Name=\"EMPLOYEES\"]}[Data] \n in EMPLOYEES1", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -1079,15 +1055,13 @@ } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-8FFC-4505-9215-655BCA5BEBAE)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", "changeType": "UPSERT", - "aspectName": "browsePaths", + "aspectName": "status", "aspect": { "json": { - "paths": [ - "/powerbi/second-demo-workspace" - ] + "removed": false } }, "systemMetadata": { @@ -1097,52 +1071,20 @@ } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-8FFC-4505-9215-655BCA5BEBAE)", - "changeType": "PATCH", - "aspectName": "dashboardInfo", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", "aspect": { - "json": [ - { - "op": "add", - "path": "/customProperties/chartCount", - "value": "0" - }, - { - "op": "add", - "path": "/customProperties/workspaceName", - "value": "second-demo-workspace" - }, - { - "op": "add", - "path": "/customProperties/workspaceId", - "value": "64ED5CAD-7C22-4684-8180-826122881108" - }, - { - "op": "add", - "path": "/title", - "value": "test_dashboard2" - }, - { - "op": "add", - "path": "/dashboardUrl", - "value": "https://localhost/dashboards/web/1" + "json": { + "customProperties": { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" }, - { - "op": "add", - "path": "/lastModified", - "value": { - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - } - ] + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", + "name": "postgres_test_table", + "description": "Library dataset description", + "tags": [] + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -1154,6 +1096,41 @@ "entityType": "dashboard", "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-8FFC-4505-9215-655BCA5BEBAE)", "changeType": "UPSERT", + "aspectName": "dashboardKey", + "aspect": { + "json": { + "dashboardTool": "powerbi", + "dashboardId": "powerbi.linkedin.com/dashboards/7D668CAD-8FFC-4505-9215-655BCA5BEBAE" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "changeType": "UPSERT", "aspectName": "status", "aspect": { "json": { @@ -1167,14 +1144,19 @@ } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-8FFC-4505-9215-655BCA5BEBAE)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", "changeType": "UPSERT", - "aspectName": "dashboardKey", + "aspectName": "datasetProperties", "aspect": { "json": { - "dashboardTool": "powerbi", - "dashboardId": "powerbi.linkedin.com/dashboards/7D668CAD-8FFC-4505-9215-655BCA5BEBAE" + "customProperties": { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", + "name": "job-history", + "description": "Library dataset description", + "tags": [] } }, "systemMetadata": { @@ -1202,5 +1184,23 @@ "runId": "powerbi-test", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_server_to_platform_instance.json b/metadata-ingestion/tests/integration/powerbi/golden_test_server_to_platform_instance.json index 90c8ee5d0379e..db5af68508f10 100644 --- a/metadata-ingestion/tests/integration/powerbi/golden_test_server_to_platform_instance.json +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_server_to_platform_instance.json @@ -17,6 +17,38 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:users.User2@foo.com", + "changeType": "UPSERT", + "aspectName": "corpUserKey", + "aspect": { + "json": { + "username": "User2@foo.com" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", @@ -40,8 +72,8 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:users.User2@foo.com", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -73,6 +105,40 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "changeType": "UPSERT", + "aspectName": "browsePaths", + "aspect": { + "json": { + "paths": [ + "/powerbi/demo-workspace" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", @@ -114,13 +180,15 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "subTypes", "aspect": { "json": { - "removed": false + "typeNames": [ + "PowerBI Tile" + ] } }, "systemMetadata": { @@ -133,12 +201,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "status", "aspect": { "json": { - "typeNames": [ - "Table" - ] + "removed": false } }, "systemMetadata": { @@ -148,20 +214,15 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "upstreamLineage", + "aspectName": "browsePathsV2", "aspect": { "json": { - "upstreams": [ + "path": [ { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_production_instance.pbi_test.test.testtable,PROD)", - "type": "TRANSFORMED" + "id": "demo-workspace" } ] } @@ -214,23 +275,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -250,19 +295,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", "changeType": "UPSERT", - "aspectName": "upstreamLineage", + "aspectName": "status", "aspect": { "json": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,operations_analytics.transformed_prod.v_aps_sme_units_v4,PROD)", - "type": "TRANSFORMED" - } - ] + "removed": false } }, "systemMetadata": { @@ -272,15 +308,14 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "chartKey", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = GoogleBigQuery.Database([BillingProject = #\"Parameter - Source\"]),\n#\"gcp-project\" = Source{[Name=#\"Parameter - Source\"]}[Data],\nuniversal_Schema = #\"gcp-project\"{[Name=\"universal\",Kind=\"Schema\"]}[Data],\nD_WH_DATE_Table = universal_Schema{[Name=\"D_WH_DATE\",Kind=\"Table\"]}[Data],\n#\"Filtered Rows\" = Table.SelectRows(D_WH_DATE_Table, each [D_DATE] > #datetime(2019, 9, 10, 0, 0, 0)),\n#\"Filtered Rows1\" = Table.SelectRows(#\"Filtered Rows\", each DateTime.IsInPreviousNHours([D_DATE], 87600))\n in \n#\"Filtered Rows1\"", - "viewLanguage": "m_query" + "dashboardTool": "powerbi", + "chartId": "charts.23212598-23b5-4980-87cc-5fc0ecd84385" } }, "systemMetadata": { @@ -291,18 +326,14 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "subTypes", "aspect": { "json": { - "customProperties": { - "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" - }, - "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "big-query-with-parameter", - "description": "Library dataset description", - "tags": [] + "typeNames": [ + "Table" + ] } }, "systemMetadata": { @@ -312,13 +343,37 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "chartInfo", "aspect": { "json": { - "removed": false + "customProperties": { + "createdFrom": "Dataset", + "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed", + "datasetWebUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details" + }, + "title": "yearly_sales", + "description": "yearly_sales", + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + }, + "inputs": [ + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)" + } + ] } }, "systemMetadata": { @@ -329,13 +384,20 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "upstreamLineage", "aspect": { "json": { - "typeNames": [ - "Table" + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_production_instance.pbi_test.test.testtable,PROD)", + "type": "TRANSFORMED" + } ] } }, @@ -347,7 +409,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", "changeType": "UPSERT", "aspectName": "upstreamLineage", "aspect": { @@ -358,7 +420,7 @@ "time": 0, "actor": "urn:li:corpuser:unknown" }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,bigquery-computing-dev-account.my-test-project.universal.d_wh_date,QA)", + "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,operations_analytics.transformed_prod.v_aps_sme_units_v4,PROD)", "type": "TRANSFORMED" } ] @@ -370,15 +432,31 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:users.User1@foo.com", + "changeType": "UPSERT", + "aspectName": "corpUserKey", + "aspect": { + "json": { + "username": "User1@foo.com" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { "json": { "materialized": false, - "viewLogic": "let\n Source = Value.NativeQuery(Snowflake.Databases(\"xaa48144.snowflakecomputing.com\",\"GSL_TEST_WH\",[Role=\"ACCOUNTADMIN\"]){[Name=\"GSL_TEST_DB\"]}[Data], \"select A.name from GSL_TEST_DB.PUBLIC.SALES_ANALYST as A inner join GSL_TEST_DB.PUBLIC.SALES_FORECAST as B on A.name = B.name where startswith(A.name, 'mo')\", null, [EnableFolding=true])\nin\n Source", + "viewLogic": "let\n Source = GoogleBigQuery.Database([BillingProject = #\"Parameter - Source\"]),\n#\"gcp-project\" = Source{[Name=#\"Parameter - Source\"]}[Data],\nuniversal_Schema = #\"gcp-project\"{[Name=\"universal\",Kind=\"Schema\"]}[Data],\nD_WH_DATE_Table = universal_Schema{[Name=\"D_WH_DATE\",Kind=\"Table\"]}[Data],\n#\"Filtered Rows\" = Table.SelectRows(D_WH_DATE_Table, each [D_DATE] > #datetime(2019, 9, 10, 0, 0, 0)),\n#\"Filtered Rows1\" = Table.SelectRows(#\"Filtered Rows\", each DateTime.IsInPreviousNHours([D_DATE], 87600))\n in \n#\"Filtered Rows1\"", "viewLanguage": "m_query" } }, @@ -390,7 +468,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { @@ -399,7 +477,7 @@ "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" }, "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "snowflake native-query-with-join", + "name": "big-query-with-parameter", "description": "Library dataset description", "tags": [] } @@ -411,8 +489,8 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:users.User1@foo.com", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -428,14 +506,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "status", "aspect": { "json": { - "typeNames": [ - "Table" - ] + "removed": false } }, "systemMetadata": { @@ -445,29 +521,14 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", "changeType": "UPSERT", - "aspectName": "upstreamLineage", + "aspectName": "browsePaths", "aspect": { "json": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,gsl_test_db.public.sales_analyst,PROD)", - "type": "TRANSFORMED" - }, - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,gsl_test_db.public.sales_forecast,PROD)", - "type": "TRANSFORMED" - } + "paths": [ + "/powerbi/demo-workspace" ] } }, @@ -479,14 +540,14 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = Oracle.Database(\"localhost:1521/salesdb.domain.com\", [HierarchicalNavigation=true]), HR = Source{[Schema=\"HR\"]}[Data], EMPLOYEES1 = HR{[Name=\"EMPLOYEES\"]}[Data] \n in EMPLOYEES1", - "viewLanguage": "m_query" + "typeNames": [ + "Table" + ] } }, "systemMetadata": { @@ -496,19 +557,13 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "status", "aspect": { "json": { - "customProperties": { - "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" - }, - "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "job-history", - "description": "Library dataset description", - "tags": [] + "removed": false } }, "systemMetadata": { @@ -518,13 +573,52 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "chartInfo", "aspect": { "json": { - "removed": false + "customProperties": { + "createdFrom": "Dataset", + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", + "datasetWebUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details" + }, + "title": "test_tile", + "description": "test_tile", + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + }, + "inputs": [ + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)" + } + ] } }, "systemMetadata": { @@ -535,13 +629,20 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "upstreamLineage", "aspect": { "json": { - "typeNames": [ - "Table" + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,bigquery-computing-dev-account.my-test-project.universal.d_wh_date,QA)", + "type": "TRANSFORMED" + } ] } }, @@ -552,21 +653,14 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", "changeType": "UPSERT", - "aspectName": "upstreamLineage", + "aspectName": "subTypes", "aspect": { "json": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:oracle,oracle-sales-instance.salesdb.hr.employees,PROD)", - "type": "TRANSFORMED" - } + "typeNames": [ + "PowerBI Tile" ] } }, @@ -578,13 +672,13 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { "json": { "materialized": false, - "viewLogic": "let\n Source = PostgreSQL.Database(\"localhost\" , \"mics\" ),\n public_order_date = Source{[Schema=\"public\",Item=\"order_date\"]}[Data] \n in \n public_order_date", + "viewLogic": "let\n Source = Value.NativeQuery(Snowflake.Databases(\"xaa48144.snowflakecomputing.com\",\"GSL_TEST_WH\",[Role=\"ACCOUNTADMIN\"]){[Name=\"GSL_TEST_DB\"]}[Data], \"select A.name from GSL_TEST_DB.PUBLIC.SALES_ANALYST as A inner join GSL_TEST_DB.PUBLIC.SALES_FORECAST as B on A.name = B.name where startswith(A.name, 'mo')\", null, [EnableFolding=true])\nin\n Source", "viewLanguage": "m_query" } }, @@ -596,7 +690,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { @@ -605,7 +699,7 @@ "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" }, "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "postgres_test_table", + "name": "snowflake native-query-with-join", "description": "Library dataset description", "tags": [] } @@ -617,13 +711,17 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "browsePathsV2", "aspect": { "json": { - "removed": false + "path": [ + { + "id": "demo-workspace" + } + ] } }, "systemMetadata": { @@ -634,14 +732,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "status", "aspect": { "json": { - "typeNames": [ - "Table" - ] + "removed": false } }, "systemMetadata": { @@ -651,22 +747,14 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", "changeType": "UPSERT", - "aspectName": "upstreamLineage", + "aspectName": "chartKey", "aspect": { "json": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,mics.public.order_date,PROD)", - "type": "TRANSFORMED" - } - ] + "dashboardTool": "powerbi", + "chartId": "charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0" } }, "systemMetadata": { @@ -695,18 +783,14 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "viewProperties", "aspect": { "json": { - "customProperties": { - "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed" - }, - "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details", - "name": "dbo_book_issue", - "description": "hr pbi test description", - "tags": [] + "materialized": false, + "viewLogic": "let\n Source = Sql.Database(\"AUPRDWHDB\", \"COMMOPSDB\", [Query=\"select *,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTH_WID) as CD_AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,'-',''))), MONTH_WID) as AGENT_KEY#(lf)#(lf)from V_PS_CD_RETENTION\", CommandTimeout=#duration(0, 1, 30, 0)]),\n #\"Changed Type\" = Table.TransformColumnTypes(Source,{{\"mth_date\", type date}}),\n #\"Added Custom\" = Table.AddColumn(#\"Changed Type\", \"Month\", each Date.Month([mth_date])),\n #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"TPV Opening\", each if [Month] = 1 then [TPV_AMV_OPENING]\nelse if [Month] = 2 then 0\nelse if [Month] = 3 then 0\nelse if [Month] = 4 then [TPV_AMV_OPENING]\nelse if [Month] = 5 then 0\nelse if [Month] = 6 then 0\nelse if [Month] = 7 then [TPV_AMV_OPENING]\nelse if [Month] = 8 then 0\nelse if [Month] = 9 then 0\nelse if [Month] = 10 then [TPV_AMV_OPENING]\nelse if [Month] = 11 then 0\nelse if [Month] = 12 then 0\n\nelse 0)\nin\n #\"Added Custom1\"", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -717,12 +801,18 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "datasetProperties", "aspect": { "json": { - "removed": false + "customProperties": { + "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details", + "name": "ms_sql_native_table", + "description": "hr pbi test description", + "tags": [] } }, "systemMetadata": { @@ -735,12 +825,16 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "datasetProperties", "aspect": { "json": { - "typeNames": [ - "Table" - ] + "customProperties": { + "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details", + "name": "dbo_book_issue", + "description": "hr pbi test description", + "tags": [] } }, "systemMetadata": { @@ -751,21 +845,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", "changeType": "UPSERT", - "aspectName": "upstreamLineage", + "aspectName": "status", "aspect": { "json": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:mssql,library.dbo.book_issue,PROD)", - "type": "TRANSFORMED" - } - ] + "removed": false } }, "systemMetadata": { @@ -776,14 +861,14 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = Sql.Database(\"AUPRDWHDB\", \"COMMOPSDB\", [Query=\"select *,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTH_WID) as CD_AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,'-',''))), MONTH_WID) as AGENT_KEY#(lf)#(lf)from V_PS_CD_RETENTION\", CommandTimeout=#duration(0, 1, 30, 0)]),\n #\"Changed Type\" = Table.TransformColumnTypes(Source,{{\"mth_date\", type date}}),\n #\"Added Custom\" = Table.AddColumn(#\"Changed Type\", \"Month\", each Date.Month([mth_date])),\n #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"TPV Opening\", each if [Month] = 1 then [TPV_AMV_OPENING]\nelse if [Month] = 2 then 0\nelse if [Month] = 3 then 0\nelse if [Month] = 4 then [TPV_AMV_OPENING]\nelse if [Month] = 5 then 0\nelse if [Month] = 6 then 0\nelse if [Month] = 7 then [TPV_AMV_OPENING]\nelse if [Month] = 8 then 0\nelse if [Month] = 9 then 0\nelse if [Month] = 10 then [TPV_AMV_OPENING]\nelse if [Month] = 11 then 0\nelse if [Month] = 12 then 0\n\nelse 0)\nin\n #\"Added Custom1\"", - "viewLanguage": "m_query" + "typeNames": [ + "Table" + ] } }, "systemMetadata": { @@ -796,16 +881,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "subTypes", "aspect": { "json": { - "customProperties": { - "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed" - }, - "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details", - "name": "ms_sql_native_table", - "description": "hr pbi test description", - "tags": [] + "typeNames": [ + "Table" + ] } }, "systemMetadata": { @@ -816,7 +897,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -831,14 +912,14 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePaths", "aspect": { "json": { - "typeNames": [ - "Table" + "paths": [ + "/powerbi/demo-workspace" ] } }, @@ -874,13 +955,15 @@ } }, { - "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:users.User1@foo.com", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", "changeType": "UPSERT", - "aspectName": "corpUserKey", + "aspectName": "subTypes", "aspect": { "json": { - "username": "User1@foo.com" + "typeNames": [ + "Table" + ] } }, "systemMetadata": { @@ -890,14 +973,67 @@ } }, { - "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:users.User2@foo.com", - "changeType": "UPSERT", - "aspectName": "corpUserKey", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "changeType": "PATCH", + "aspectName": "dashboardInfo", "aspect": { - "json": { - "username": "User2@foo.com" - } + "json": [ + { + "op": "add", + "path": "/customProperties/chartCount", + "value": "2" + }, + { + "op": "add", + "path": "/customProperties/workspaceName", + "value": "demo-workspace" + }, + { + "op": "add", + "path": "/customProperties/workspaceId", + "value": "64ED5CAD-7C10-4684-8180-826122881108" + }, + { + "op": "add", + "path": "/title", + "value": "test_dashboard" + }, + { + "op": "add", + "path": "/description", + "value": "Description of test dashboard" + }, + { + "op": "add", + "path": "/charts/urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "value": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)" + }, + { + "op": "add", + "path": "/charts/urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "value": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)" + }, + { + "op": "add", + "path": "/dashboardUrl", + "value": "https://localhost/dashboards/web/1" + }, + { + "op": "add", + "path": "/lastModified", + "value": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + } + ] }, "systemMetadata": { "lastObserved": 1643871600000, @@ -906,50 +1042,28 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", "changeType": "UPSERT", - "aspectName": "chartInfo", + "aspectName": "upstreamLineage", "aspect": { "json": { - "customProperties": { - "createdFrom": "Dataset", - "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", - "datasetWebUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details" - }, - "title": "test_tile", - "description": "test_tile", - "lastModified": { - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - }, - "inputs": [ - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)" - }, - { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)" - }, + "upstreams": [ { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)" + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,gsl_test_db.public.sales_analyst,PROD)", + "type": "TRANSFORMED" }, { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)" + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,gsl_test_db.public.sales_forecast,PROD)", + "type": "TRANSFORMED" } ] } @@ -961,30 +1075,21 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "upstreamLineage", "aspect": { "json": { - "typeNames": [ - "PowerBI Tile" + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:mssql,library.dbo.book_issue,PROD)", + "type": "TRANSFORMED" + } ] } }, @@ -995,14 +1100,13 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", "changeType": "UPSERT", - "aspectName": "chartKey", + "aspectName": "status", "aspect": { "json": { - "dashboardTool": "powerbi", - "chartId": "charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0" + "removed": false } }, "systemMetadata": { @@ -1012,15 +1116,14 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", "changeType": "UPSERT", - "aspectName": "browsePaths", + "aspectName": "dashboardKey", "aspect": { "json": { - "paths": [ - "/powerbi/demo-workspace" - ] + "dashboardTool": "powerbi", + "dashboardId": "powerbi.linkedin.com/dashboards/7D668CAD-7FFC-4505-9215-655BCA5BEBAE" } }, "systemMetadata": { @@ -1030,8 +1133,8 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -1050,53 +1153,27 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", "changeType": "UPSERT", - "aspectName": "chartInfo", + "aspectName": "ownership", "aspect": { "json": { - "customProperties": { - "createdFrom": "Dataset", - "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed", - "datasetWebUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details" - }, - "title": "yearly_sales", - "description": "yearly_sales", - "lastModified": { - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - }, - "inputs": [ + "owners": [ { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)" + "owner": "urn:li:corpuser:users.User1@foo.com", + "type": "NONE" }, { - "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)" + "owner": "urn:li:corpuser:users.User2@foo.com", + "type": "NONE" } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } } }, "systemMetadata": { @@ -1106,14 +1183,22 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", "changeType": "UPSERT", - "aspectName": "chartKey", + "aspectName": "upstreamLineage", "aspect": { "json": { - "dashboardTool": "powerbi", - "chartId": "charts.23212598-23b5-4980-87cc-5fc0ecd84385" + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:oracle,oracle-sales-instance.salesdb.hr.employees,PROD)", + "type": "TRANSFORMED" + } + ] } }, "systemMetadata": { @@ -1123,15 +1208,15 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "viewProperties", "aspect": { "json": { - "typeNames": [ - "PowerBI Tile" - ] + "materialized": false, + "viewLogic": "let\n Source = Oracle.Database(\"localhost:1521/salesdb.domain.com\", [HierarchicalNavigation=true]), HR = Source{[Schema=\"HR\"]}[Data], EMPLOYEES1 = HR{[Name=\"EMPLOYEES\"]}[Data] \n in EMPLOYEES1", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -1141,15 +1226,15 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", "changeType": "UPSERT", - "aspectName": "browsePaths", + "aspectName": "viewProperties", "aspect": { "json": { - "paths": [ - "/powerbi/demo-workspace" - ] + "materialized": false, + "viewLogic": "let\n Source = PostgreSQL.Database(\"localhost\" , \"mics\" ),\n public_order_date = Source{[Schema=\"public\",Item=\"order_date\"]}[Data] \n in \n public_order_date", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -1159,17 +1244,19 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "datasetProperties", "aspect": { "json": { - "path": [ - { - "id": "demo-workspace" - } - ] + "customProperties": { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", + "name": "postgres_test_table", + "description": "Library dataset description", + "tags": [] } }, "systemMetadata": { @@ -1179,14 +1266,14 @@ } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", "changeType": "UPSERT", - "aspectName": "browsePaths", + "aspectName": "subTypes", "aspect": { "json": { - "paths": [ - "/powerbi/demo-workspace" + "typeNames": [ + "Table" ] } }, @@ -1197,77 +1284,8 @@ } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", - "changeType": "PATCH", - "aspectName": "dashboardInfo", - "aspect": { - "json": [ - { - "op": "add", - "path": "/customProperties/chartCount", - "value": "2" - }, - { - "op": "add", - "path": "/customProperties/workspaceName", - "value": "demo-workspace" - }, - { - "op": "add", - "path": "/customProperties/workspaceId", - "value": "64ED5CAD-7C10-4684-8180-826122881108" - }, - { - "op": "add", - "path": "/title", - "value": "test_dashboard" - }, - { - "op": "add", - "path": "/description", - "value": "Description of test dashboard" - }, - { - "op": "add", - "path": "/charts/urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", - "value": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)" - }, - { - "op": "add", - "path": "/charts/urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", - "value": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)" - }, - { - "op": "add", - "path": "/dashboardUrl", - "value": "https://localhost/dashboards/web/1" - }, - { - "op": "add", - "path": "/lastModified", - "value": { - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - } - ] - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -1282,44 +1300,19 @@ } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", - "changeType": "UPSERT", - "aspectName": "dashboardKey", - "aspect": { - "json": { - "dashboardTool": "powerbi", - "dashboardId": "powerbi.linkedin.com/dashboards/7D668CAD-7FFC-4505-9215-655BCA5BEBAE" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", "changeType": "UPSERT", - "aspectName": "ownership", + "aspectName": "datasetProperties", "aspect": { "json": { - "owners": [ - { - "owner": "urn:li:corpuser:users.User1@foo.com", - "type": "NONE" - }, - { - "owner": "urn:li:corpuser:users.User2@foo.com", - "type": "NONE" - } - ], - "ownerTypes": {}, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } + "customProperties": { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", + "name": "job-history", + "description": "Library dataset description", + "tags": [] } }, "systemMetadata": { @@ -1329,16 +1322,14 @@ } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "subTypes", "aspect": { "json": { - "path": [ - { - "id": "demo-workspace" - } + "typeNames": [ + "Table" ] } }, @@ -1349,8 +1340,8 @@ } }, { - "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:users.User1@foo.com", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -1365,13 +1356,22 @@ } }, { - "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:users.User2@foo.com", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "upstreamLineage", "aspect": { "json": { - "removed": false + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,mics.public.order_date,PROD)", + "type": "TRANSFORMED" + } + ] } }, "systemMetadata": { diff --git a/metadata-ingestion/tests/integration/powerbi/mock_data/cross_workspace_mock_response.json b/metadata-ingestion/tests/integration/powerbi/mock_data/cross_workspace_mock_response.json new file mode 100644 index 0000000000000..9b6931727bc1a --- /dev/null +++ b/metadata-ingestion/tests/integration/powerbi/mock_data/cross_workspace_mock_response.json @@ -0,0 +1,220 @@ +{ + "https://api.powerbi.com/v1.0/myorg/groups?%24skip=0&%24top=1000": { + "method": "GET", + "status_code": 200, + "json": { + "value": [ + { + "id": "A8D655A6-F521-477E-8C22-255018583BF4", + "isReadOnly": true, + "name": "Sales", + "type": "Workspace", + "state": "Active" + }, + { + "id": "C5DA6EA8-625E-4AB1-90B6-CAEA0BF9F492", + "isReadOnly": true, + "name": "Global Workspace", + "type": "Workspace", + "state": "Active" + } + ] + } + }, + "https://api.powerbi.com/v1.0/myorg/groups?%24skip=1000&%24top=1000": { + "method": "GET", + "status_code": 200, + "json": { + "value": [] + } + }, + "https://api.powerbi.com/v1.0/myorg/groups/A8D655A6-F521-477E-8C22-255018583BF4/dashboards": { + "method": "GET", + "status_code": 200, + "json": { + "value": [ + { + "id": "A1C7204F-4D04-4E5E-B886-B30EA2C64CB3", + "isReadOnly": true, + "displayName": "test_dashboard", + "description": "Description of test dashboard", + "embedUrl": "https://localhost/dashboards/embed/1", + "webUrl": "https://localhost/dashboards/web/1" + } + ] + } + }, + "https://api.powerbi.com/v1.0/myorg/groups/C5DA6EA8-625E-4AB1-90B6-CAEA0BF9F492/dashboards": { + "method": "GET", + "status_code": 200, + "json": { + "value": [] + } + }, + "https://api.powerbi.com/v1.0/myorg/groups/A8D655A6-F521-477E-8C22-255018583BF4/dashboards/A1C7204F-4D04-4E5E-B886-B30EA2C64CB3/tiles": { + "method": "GET", + "status_code": 200, + "json": { + "value": [ + { + "id": "885D1762-1655-46BA-AFE3-74C6EC403A9E", + "title": "Sale Order Tile", + "embedUrl": "https://localhost/tiles/embed/1", + "datasetId": "317456E5-1FC7-4BDC-9C84-1185825E293D" + }, + { + "id": "945C2C2A-4588-45DE-8385-F24F5E39A57C", + "title": "Yearly Sales", + "embedUrl": "https://localhost/tiles/embed/2", + "datasetId": "FE362B98-956E-4394-BA37-6367EE6435E9" + }, + { + "id": "B847EBDA-BC48-4F92-8E16-6B46D900E7BB", + "title": "Not Present In Current Ingestion", + "embedUrl": "https://localhost/tiles/embed/2", + "datasetId": "0F0ADA0E-E38A-44F6-B667-90E93A96F5A1" + } + ] + } + }, + "https://api.powerbi.com/v1.0/myorg/admin/workspaces/scanResult/62DAF926-0B18-4FF1-982C-2A3EB6B8F0E4": { + "method": "GET", + "status_code": 200, + "json": { + "workspaces": [ + { + "id": "A8D655A6-F521-477E-8C22-255018583BF4", + "name": "Sales", + "type": "Workspace", + "state": "Active", + "datasets": [ + { + "id": "317456E5-1FC7-4BDC-9C84-1185825E293D", + "endorsementDetails": { + "endorsement": "Promoted" + }, + "name": "sales semantic model", + "tables": [ + { + "name": "public issue_history", + "source": [ + { + "expression": "dummy" + } + ], + "datasourceUsages": [ + { + "datasourceInstanceId": "DCE90B40-84D6-467A-9A5C-648E830E72D3" + } + ] + } + ] + } + ] + }, + { + "id": "C5DA6EA8-625E-4AB1-90B6-CAEA0BF9F492", + "name": "Global Workspace", + "type": "Workspace", + "state": "Active", + "datasets": [ + { + "id": "FE362B98-956E-4394-BA37-6367EE6435E9", + "endorsementDetails": { + "endorsement": "Promoted" + }, + "name": "base_records", + "tables": [ + { + "name": "core_sales_set", + "source": [ + { + "expression": "dummy" + } + ], + "datasourceUsages": [ + { + "datasourceInstanceId": "DCE90B40-84D6-467A-9A5C-648E830E72D3" + } + ] + } + ] + } + ] + } + ] + } + }, + "https://api.powerbi.com/v1.0/myorg/admin/workspaces/scanStatus/62DAF926-0B18-4FF1-982C-2A3EB6B8F0E4": { + "method": "GET", + "status_code": 200, + "json": { + "status": "SUCCEEDED" + } + }, + "https://api.powerbi.com/v1.0/myorg/admin/workspaces/scanResult/81B02907-E2A3-45C3-B505-3781839C8CAA": { + "method": "GET", + "status_code": 200, + "json": { + "workspaces": [ + { + "id": "C5DA6EA8-625E-4AB1-90B6-CAEA0BF9F492", + "name": "Global Workspace", + "type": "Workspace", + "state": "Active", + "datasets": [ + { + "id": "FE362B98-956E-4394-BA37-6367EE6435E9", + "endorsementDetails": { + "endorsement": "Promoted" + }, + "name": "base_records", + "tables": [ + { + "name": "core_sales_set", + "source": [ + { + "expression": "dummy" + } + ], + "datasourceUsages": [ + { + "datasourceInstanceId": "DCE90B40-84D6-467A-9A5C-648E830E72D3" + } + ] + } + ] + } + ] + } + ] + } + }, + "https://api.powerbi.com/v1.0/myorg/admin/workspaces/scanStatus/81B02907-E2A3-45C3-B505-3781839C8CAA": { + "method": "GET", + "status_code": 200, + "json": { + "status": "SUCCEEDED" + } + }, + "https://api.powerbi.com/v1.0/myorg/groups/A8D655A6-F521-477E-8C22-255018583BF4/datasets/317456E5-1FC7-4BDC-9C84-1185825E293D": { + "method": "GET", + "status_code": 200, + "json": { + "id": "317456E5-1FC7-4BDC-9C84-1185825E293D", + "name": "sales semantic model", + "description": "sales semantic model", + "webUrl": "http://localhost/groups/A8D655A6-F521-477E-8C22-255018583BF4/datasets/317456E5-1FC7-4BDC-9C84-1185825E293D" + } + }, + "https://api.powerbi.com/v1.0/myorg/groups/C5DA6EA8-625E-4AB1-90B6-CAEA0BF9F492/datasets/FE362B98-956E-4394-BA37-6367EE6435E9": { + "method": "GET", + "status_code": 200, + "json": { + "id": "FE362B98-956E-4394-BA37-6367EE6435E9", + "name": "base_records", + "description": "base_records", + "webUrl": "http://localhost/groups/C5DA6EA8-625E-4AB1-90B6-CAEA0BF9F492/datasets/FE362B98-956E-4394-BA37-6367EE6435E9" + } + } +} \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/powerbi/mock_data/default_mock_response.json b/metadata-ingestion/tests/integration/powerbi/mock_data/default_mock_response.json new file mode 100644 index 0000000000000..28972fbded9e8 --- /dev/null +++ b/metadata-ingestion/tests/integration/powerbi/mock_data/default_mock_response.json @@ -0,0 +1,558 @@ +{ + "https://api.powerbi.com/v1.0/myorg/groups?%24skip=0&%24top=1000": { + "method": "GET", + "status_code": 200, + "json": { + "value": [ + { + "id": "64ED5CAD-7C10-4684-8180-826122881108", + "isReadOnly": true, + "name": "demo-workspace", + "type": "Workspace", + "state": "Active" + + }, + { + "id": "64ED5CAD-7C22-4684-8180-826122881108", + "isReadOnly": true, + "name": "second-demo-workspace", + "type": "Workspace", + "state": "Active" + + }, + { + "id": "64ED5CAD-7322-4684-8180-826122881108", + "isReadOnly": true, + "name": "Workspace 2", + "type": "Workspace", + "state": "Active" + } + ] + } + }, + "https://api.powerbi.com/v1.0/myorg/groups?%24skip=1000&%24top=1000": { + "method": "GET", + "status_code": 200, + "json": { + "value": [] + } + }, + "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/dashboards": { + "method": "GET", + "status_code": 200, + "json": { + "value": [ + { + "id": "7D668CAD-7FFC-4505-9215-655BCA5BEBAE", + "isReadOnly": true, + "displayName": "test_dashboard", + "description": "Description of test dashboard", + "embedUrl": "https://localhost/dashboards/embed/1", + "webUrl": "https://localhost/dashboards/web/1" + } + ] + } + }, + "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C22-4684-8180-826122881108/dashboards": { + "method": "GET", + "status_code": 200, + "json": { + "value": [ + { + "id": "7D668CAD-8FFC-4505-9215-655BCA5BEBAE", + "isReadOnly": true, + "displayName": "test_dashboard2", + "embedUrl": "https://localhost/dashboards/embed/1", + "webUrl": "https://localhost/dashboards/web/1" + } + ] + } + }, + "https://api.powerbi.com/v1.0/myorg/admin/reports/5b218778-e7a5-4d73-8187-f10824047715/users": { + "method": "GET", + "status_code": 200, + "json": { + "value": [ + { + "identifier": "User1@foo.com", + "displayName": "user1", + "emailAddress": "User1@foo.com", + "datasetUserAccessRight": "ReadWrite", + "graphId": "C9EE53F2-88EA-4711-A173-AF0515A3CD46", + "principalType": "User" + }, + { + "identifier": "User2@foo.com", + "displayName": "user2", + "emailAddress": "User2@foo.com", + "datasetUserAccessRight": "ReadWrite", + "graphId": "C9EE53F2-88EA-4711-A173-AF0515A5REWS", + "principalType": "User" + } + ] + } + }, + "https://api.powerbi.com/v1.0/myorg/admin/dashboards/7D668CAD-7FFC-4505-9215-655BCA5BEBAE/users": { + "method": "GET", + "status_code": 200, + "json": { + "value": [ + { + "identifier": "User1@foo.com", + "displayName": "user1", + "emailAddress": "User1@foo.com", + "datasetUserAccessRight": "ReadWrite", + "graphId": "C9EE53F2-88EA-4711-A173-AF0515A3CD46", + "principalType": "User" + }, + { + "identifier": "User2@foo.com", + "displayName": "user2", + "emailAddress": "User2@foo.com", + "datasetUserAccessRight": "ReadWrite", + "graphId": "C9EE53F2-88EA-4711-A173-AF0515A5REWS", + "principalType": "User" + } + ] + } + }, + "https://api.powerbi.com/v1.0/myorg/admin/dashboards/7D668CAD-8FFC-4505-9215-655BCA5BEBAE/users": { + "method": "GET", + "status_code": 200, + "json": { + "value": [ + { + "identifier": "User3@foo.com", + "displayName": "user3", + "emailAddress": "User3@foo.com", + "datasetUserAccessRight": "ReadWrite", + "graphId": "C9EE53F2-88EA-4711-A173-AF0515A3CD46", + "principalType": "User" + }, + { + "identifier": "User4@foo.com", + "displayName": "user4", + "emailAddress": "User4@foo.com", + "datasetUserAccessRight": "ReadWrite", + "graphId": "C9EE53F2-88EA-4711-A173-AF0515A5REWS", + "principalType": "User" + } + ] + } + }, + "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/dashboards/7D668CAD-7FFC-4505-9215-655BCA5BEBAE/tiles": { + "method": "GET", + "status_code": 200, + "json": { + "value": [ + { + "id": "B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0", + "title": "test_tile", + "embedUrl": "https://localhost/tiles/embed/1", + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" + }, + { + "id": "23212598-23b5-4980-87cc-5fc0ecd84385", + "title": "yearly_sales", + "embedUrl": "https://localhost/tiles/embed/2", + "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed" + } + ] + } + }, + "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C22-4684-8180-826122881108/dashboards/7D668CAD-8FFC-4505-9215-655BCA5BEBAE/tiles": { + "method": "GET", + "status_code": 200, + "json": { + "value": [] + } + }, + "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445": { + "method": "GET", + "status_code": 200, + "json": { + "id": "05169CD2-E713-41E6-9600-1D8066D95445", + "name": "library-dataset", + "description": "Library dataset description", + "webUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445" + } + }, + "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C22-4684-8180-826122881108/datasets/05169CD2-E713-41E6-96AA-1D8066D95445": { + "method": "GET", + "status_code": 200, + "json": { + "id": "05169CD2-E713-41E6-96AA-1D8066D95445", + "name": "library-dataset", + "description": "Library dataset description", + "webUrl": "http://localhost/groups/64ED5CAD-7C22-4684-8180-826122881108/datasets/05169CD2-E713-41E6-96AA-1D8066D95445" + } + }, + "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed": { + "method": "GET", + "status_code": 200, + "json": { + "id": "ba0130a1-5b03-40de-9535-b34e778ea6ed", + "name": "hr_pbi_test", + "description": "hr pbi test description", + "webUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed" + } + }, + "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/datasources": { + "method": "GET", + "status_code": 200, + "json": { + "value": [ + { + "datasourceId": "DCE90B40-84D6-467A-9A5C-648E830E72D3", + "datasourceType": "PostgreSql", + "connectionDetails": { + "database": "library_db", + "server": "foo" + } + } + ] + } + }, + "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C22-4684-8180-826122881108/datasets/05169CD2-E713-41E6-96AA-1D8066D95445/datasources": { + "method": "GET", + "status_code": 200, + "json": { + "value": [ + { + "datasourceId": "DCE90B40-84D6-467A-9A5C-648E830E72D3", + "datasourceType": "PostgreSql", + "connectionDetails": { + "database": "library_db", + "server": "foo" + } + } + ] + } + }, + "https://api.powerbi.com/v1.0/myorg/admin/workspaces/scanStatus/4674efd1-603c-4129-8d82-03cf2be05aff": { + "method": "GET", + "status_code": 200, + "json": { + "status": "SUCCEEDED" + } + }, + "https://api.powerbi.com/v1.0/myorg/admin/workspaces/scanStatus/a674efd1-603c-4129-8d82-03cf2be05aff": { + "method": "GET", + "status_code": 200, + "json": { + "status": "SUCCEEDED" + } + }, + "https://api.powerbi.com/v1.0/myorg/admin/workspaces/scanResult/4674efd1-603c-4129-8d82-03cf2be05aff": { + "method": "GET", + "status_code": 200, + "json": { + "workspaces": [ + { + "id": "64ED5CAD-7C10-4684-8180-826122881108", + "name": "demo-workspace", + "type": "Workspace", + "state": "Active", + "datasets": [ + { + "id": "05169CD2-E713-41E6-9600-1D8066D95445", + "endorsementDetails": { + "endorsement": "Promoted" + }, + "name": "test_sf_pbi_test", + "tables": [ + { + "name": "public issue_history", + "source": [ + { + "expression": "dummy" + } + ], + "datasourceUsages": [ + { + "datasourceInstanceId": "DCE90B40-84D6-467A-9A5C-648E830E72D3" + } + ] + }, + { + "name": "SNOWFLAKE_TESTTABLE", + "source": [ + { + "expression": "let\n Source = Snowflake.Databases(\"hp123rt5.ap-southeast-2.fakecomputing.com\",\"PBI_TEST_WAREHOUSE_PROD\",[Role=\"PBI_TEST_MEMBER\"]),\n PBI_TEST_Database = Source{[Name=\"PBI_TEST\",Kind=\"Database\"]}[Data],\n TEST_Schema = PBI_TEST_Database{[Name=\"TEST\",Kind=\"Schema\"]}[Data],\n TESTTABLE_Table = TEST_Schema{[Name=\"TESTTABLE\",Kind=\"Table\"]}[Data]\nin\n TESTTABLE_Table" + } + ], + "datasourceUsages": [ + { + "datasourceInstanceId": "DCE90B40-84D6-467A-9A5C-648E830E72D3" + } + ] + }, + { + "name": "snowflake native-query", + "source": [ + { + "expression": "let\n Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"SELECT#(lf)concat((UPPER(REPLACE(SELLER,'-',''))), MONTHID) as AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTHID) as CD_AGENT_KEY,#(lf) *#(lf)FROM#(lf)OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_APS_SME_UNITS_V4\", null, [EnableFolding=true]),\n #\"Added Conditional Column\" = Table.AddColumn(Source, \"SME Units ENT\", each if [DEAL_TYPE] = \"SME Unit\" then [UNIT] else 0),\n #\"Added Conditional Column1\" = Table.AddColumn(#\"Added Conditional Column\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" then [UNIT] else 0),\n #\"Removed Columns\" = Table.RemoveColumns(#\"Added Conditional Column1\",{\"Banklink Units\"}),\n #\"Added Custom\" = Table.AddColumn(#\"Removed Columns\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" and [SALES_TYPE] = \"3 - Upsell\"\nthen [UNIT]\n\nelse if [SALES_TYPE] = \"Adjusted BL Migration\"\nthen [UNIT]\n\nelse 0),\n #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"SME Units in $ (*$361)\", each if [DEAL_TYPE] = \"SME Unit\" \nand [SALES_TYPE] <> \"4 - Renewal\"\n then [UNIT] * 361\nelse 0),\n #\"Added Custom2\" = Table.AddColumn(#\"Added Custom1\", \"Banklink in $ (*$148)\", each [Banklink Units] * 148)\nin\n #\"Added Custom2\"" + } + ], + "datasourceUsages": [ + { + "datasourceInstanceId": "DCE90B40-84D6-467A-9A5C-648E830E72D3" + } + ] + }, + { + "name": "big-query-with-parameter", + "source": [ + { + "expression": "let\n Source = GoogleBigQuery.Database([BillingProject = #\"Parameter - Source\"]),\n#\"gcp-project\" = Source{[Name=#\"Parameter - Source\"]}[Data],\nuniversal_Schema = #\"gcp-project\"{[Name=\"universal\",Kind=\"Schema\"]}[Data],\nD_WH_DATE_Table = universal_Schema{[Name=\"D_WH_DATE\",Kind=\"Table\"]}[Data],\n#\"Filtered Rows\" = Table.SelectRows(D_WH_DATE_Table, each [D_DATE] > #datetime(2019, 9, 10, 0, 0, 0)),\n#\"Filtered Rows1\" = Table.SelectRows(#\"Filtered Rows\", each DateTime.IsInPreviousNHours([D_DATE], 87600))\n in \n#\"Filtered Rows1\"" + } + ], + "datasourceUsages": [ + { + "datasourceInstanceId": "DCE90B40-84D6-467A-9A5C-648E830E72D3" + } + ] + }, + { + "name": "snowflake native-query-with-join", + "source": [ + { + "expression": "let\n Source = Value.NativeQuery(Snowflake.Databases(\"xaa48144.snowflakecomputing.com\",\"GSL_TEST_WH\",[Role=\"ACCOUNTADMIN\"]){[Name=\"GSL_TEST_DB\"]}[Data], \"select A.name from GSL_TEST_DB.PUBLIC.SALES_ANALYST as A inner join GSL_TEST_DB.PUBLIC.SALES_FORECAST as B on A.name = B.name where startswith(A.name, 'mo')\", null, [EnableFolding=true])\nin\n Source" + } + ], + "datasourceUsages": [ + { + "datasourceInstanceId": "DCE90B40-84D6-467A-9A5C-648E830E72D3" + } + ] + }, + { + "name": "job-history", + "source": [ + { + "expression": "let\n Source = Oracle.Database(\"localhost:1521/salesdb.domain.com\", [HierarchicalNavigation=true]), HR = Source{[Schema=\"HR\"]}[Data], EMPLOYEES1 = HR{[Name=\"EMPLOYEES\"]}[Data] \n in EMPLOYEES1" + } + ], + "datasourceUsages": [ + { + "datasourceInstanceId": "DCE90B40-84D6-467A-9A5C-648E830E72D3" + } + ] + }, + { + "name": "postgres_test_table", + "source": [ + { + "expression": "let\n Source = PostgreSQL.Database(\"localhost\" , \"mics\" ),\n public_order_date = Source{[Schema=\"public\",Item=\"order_date\"]}[Data] \n in \n public_order_date" + } + ], + "datasourceUsages": [ + { + "datasourceInstanceId": "DCE90B40-84D6-467A-9A5C-648E830E72D3" + } + ] + } + ] + }, + { + "id": "ba0130a1-5b03-40de-9535-b34e778ea6ed", + "name": "hr_pbi_test", + "tables": [ + { + "name": "dbo_book_issue", + "source": [ + { + "expression": "let\n Source = Sql.Database(\"localhost\", \"library\"),\n dbo_book_issue = Source{[Schema=\"dbo\",Item=\"book_issue\"]}[Data]\n in dbo_book_issue" + } + ], + "datasourceUsages": [ + { + "datasourceInstanceId": "DCE90B40-84D6-467A-9A5C-648E830E72D3" + } + ] + }, + { + "name": "ms_sql_native_table", + "source": [ + { + "expression": "let\n Source = Sql.Database(\"AUPRDWHDB\", \"COMMOPSDB\", [Query=\"select *,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTH_WID) as CD_AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,'-',''))), MONTH_WID) as AGENT_KEY#(lf)#(lf)from V_PS_CD_RETENTION\", CommandTimeout=#duration(0, 1, 30, 0)]),\n #\"Changed Type\" = Table.TransformColumnTypes(Source,{{\"mth_date\", type date}}),\n #\"Added Custom\" = Table.AddColumn(#\"Changed Type\", \"Month\", each Date.Month([mth_date])),\n #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"TPV Opening\", each if [Month] = 1 then [TPV_AMV_OPENING]\nelse if [Month] = 2 then 0\nelse if [Month] = 3 then 0\nelse if [Month] = 4 then [TPV_AMV_OPENING]\nelse if [Month] = 5 then 0\nelse if [Month] = 6 then 0\nelse if [Month] = 7 then [TPV_AMV_OPENING]\nelse if [Month] = 8 then 0\nelse if [Month] = 9 then 0\nelse if [Month] = 10 then [TPV_AMV_OPENING]\nelse if [Month] = 11 then 0\nelse if [Month] = 12 then 0\n\nelse 0)\nin\n #\"Added Custom1\"" + } + ], + "datasourceUsages": [ + { + "datasourceInstanceId": "DCE90B40-84D6-467A-9A5C-648E830E72D3" + } + ] + } + ] + }, + { + "id": "91580e0e-1680-4b1c-bbf9-4f6764d7a5ff", + "tables": [ + { + "name": "employee_ctc", + "source": [ + { + "expression": "dummy" + } + ] + } + ] + } + ], + "dashboards": [ + { + "id": "7D668CAD-7FFC-4505-9215-655BCA5BEBAE", + "isReadOnly": true + } + ], + "reports": [ + { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", + "reportType": "PaginatedReport", + "id": "5b218778-e7a5-4d73-8187-f10824047715", + "name": "SalesMarketing", + "description": "Acryl sales marketing report" + } + ] + } + ] + } + }, + "https://api.powerbi.com/v1.0/myorg/admin/workspaces/scanResult/a674efd1-603c-4129-8d82-03cf2be05aff": { + "method": "GET", + "status_code": 200, + "json": { + "workspaces": [ + { + "id": "64ED5CAD-7C22-4684-8180-826122881108", + "name": "second-demo-workspace", + "type": "Workspace", + "state": "Active", + "datasets": [ + { + "id": "05169CD2-E713-41E6-96AA-1D8066D95445", + "tables": [ + { + "name": "public articles", + "source": [ + { + "expression": "dummy" + } + ] + } + ] + } + ], + "dashboards": [ + { + "id": "7D668CAD-8FFC-4505-9215-655BCA5BEBAE", + "isReadOnly": true + } + ], + "reports": [ + { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", + "id": "5b218778-e7a5-4d73-8187-f10824047715", + "reportType": "PaginatedReport", + "name": "SalesMarketing", + "description": "Acryl sales marketing report" + } + ] + } + ] + } + }, + "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/reports": { + "method": "GET", + "status_code": 200, + "json": { + "value": [ + { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", + "id": "5b218778-e7a5-4d73-8187-f10824047715", + "reportType": "PowerBIReport", + "name": "SalesMarketing", + "description": "Acryl sales marketing report", + "webUrl": "https://app.powerbi.com/groups/f089354e-8366-4e18-aea3-4cb4a3a50b48/reports/5b218778-e7a5-4d73-8187-f10824047715", + "embedUrl": "https://app.powerbi.com/reportEmbed?reportId=5b218778-e7a5-4d73-8187-f10824047715&groupId=f089354e-8366-4e18-aea3-4cb4a3a50b48" + }, + { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", + "id": "584cf13a-1485-41c2-a514-b1bb66fff163", + "reportType": "PaginatedReport", + "name": "Printable SalesMarketing", + "description": "Acryl sales marketing report", + "webUrl": "https://app.powerbi.com/groups/f089354e-8366-4e18-aea3-4cb4a3a50b48/reports/584cf13a-1485-41c2-a514-b1bb66fff163", + "embedUrl": "https://app.powerbi.com/reportEmbed?reportId=584cf13a-1485-41c2-a514-b1bb66fff163&groupId=f089354e-8366-4e18-aea3-4cb4a3a50b48" + } + ] + } + }, + "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/reports/5b218778-e7a5-4d73-8187-f10824047715": { + "method": "GET", + "status_code": 200, + "json": { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", + "id": "5b218778-e7a5-4d73-8187-f10824047715", + "reportType": "PaginatedReport", + "name": "SalesMarketing", + "description": "Acryl sales marketing report", + "webUrl": "https://app.powerbi.com/groups/f089354e-8366-4e18-aea3-4cb4a3a50b48/reports/5b218778-e7a5-4d73-8187-f10824047715", + "embedUrl": "https://app.powerbi.com/reportEmbed?reportId=5b218778-e7a5-4d73-8187-f10824047715&groupId=f089354e-8366-4e18-aea3-4cb4a3a50b48" + } + }, + "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/reports/5b218778-e7a5-4d73-8187-f10824047715/pages": { + "method": "GET", + "status_code": 200, + "json": { + "value": [ + { + "displayName": "Regional Sales Analysis", + "name": "ReportSection", + "order": "0" + }, + { + "displayName": "Geographic Analysis", + "name": "ReportSection1", + "order": "1" + } + ] + } + }, + "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/parameters": { + "method": "GET", + "status_code": 200, + "json": { + "value": [ + { + "name": "Parameter - Source", + "type": "Text", + "isRequired": true, + "currentValue": "my-test-project" + }, + { + "name": "My bq project", + "type": "Text", + "isRequired": true, + "currentValue": "gcp_billing" + } + ] + } + }, + "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/91580e0e-1680-4b1c-bbf9-4f6764d7a5ff": { + "method": "GET", + "status_code": 200, + "json": { + "id": "91580e0e-1680-4b1c-bbf9-4f6764d7a5ff", + "name": "employee-dataset", + "description": "Employee Management", + "webUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/91580e0e-1680-4b1c-bbf9-4f6764d7a5ff" + } + }, + "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/reports/584cf13a-1485-41c2-a514-b1bb66fff163/pages": { + "method": "GET", + "status_code": 400, + "text": "{\"error\":{\"code\":\"InvalidRequest\",\"message\":\"Request is currently not supported for RDL reports\"}}" + } +} \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/powerbi/mock_data/workspace_type_filter.json b/metadata-ingestion/tests/integration/powerbi/mock_data/workspace_type_filter.json new file mode 100644 index 0000000000000..9a6ea36e7ab46 --- /dev/null +++ b/metadata-ingestion/tests/integration/powerbi/mock_data/workspace_type_filter.json @@ -0,0 +1,76 @@ +{ + "https://api.powerbi.com/v1.0/myorg/groups?%24skip=0&%24top=1000": { + "method": "GET", + "status_code": 200, + "json": { + "value": [ + { + "id": "90E9E256-3D6D-4D38-86C8-6CCCBD8C170C", + "isReadOnly": true, + "name": "Jane Smith Workspace", + "type": "PersonalGroup", + "state": "Active" + }, + { + "id": "C6B5DBBC-7580-406C-A6BE-72628C28801C", + "isReadOnly": true, + "name": "Sales", + "type": "Workspace", + "state": "Active" + } + ] + } + }, + "https://api.powerbi.com/v1.0/myorg/groups?%24skip=1000&%24top=1000": { + "method": "GET", + "status_code": 200, + "json": { + "value": [] + } + }, + "https://api.powerbi.com/v1.0/myorg/admin/workspaces/scanResult/4278EDC0-85AA-4BF2-B96A-2BC6C82B73C3": { + "method": "GET", + "status_code": 200, + "json": { + "workspaces": [ + { + "id": "90E9E256-3D6D-4D38-86C8-6CCCBD8C170C", + "name": "Jane Smith Workspace", + "type": "PersonalGroup", + "state": "Active", + "datasets": [] + } + ] + } + }, + "https://api.powerbi.com/v1.0/myorg/groups/90E9E256-3D6D-4D38-86C8-6CCCBD8C170C/dashboards": { + "method": "GET", + "status_code": 200, + "json": { + "value": [ + { + "id": "7D668CAD-7FFC-4505-9215-655BCA5BEBAE", + "isReadOnly": true, + "displayName": "test_dashboard", + "description": "Description of test dashboard", + "embedUrl": "https://localhost/dashboards/embed/1", + "webUrl": "https://localhost/dashboards/web/1" + } + ] + } + }, + "https://api.powerbi.com/v1.0/myorg/admin/workspaces/scanStatus/4278EDC0-85AA-4BF2-B96A-2BC6C82B73C3": { + "method": "GET", + "status_code": 200, + "json": { + "status": "SUCCEEDED" + } + }, + "https://api.powerbi.com/v1.0/myorg/groups/90E9E256-3D6D-4D38-86C8-6CCCBD8C170C/dashboards/7D668CAD-7FFC-4505-9215-655BCA5BEBAE/tiles": { + "method": "GET", + "status_code": 200, + "json": { + "value": [] + } + } +} \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py index 43f77b059e41f..78cf103107477 100644 --- a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py +++ b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py @@ -1,14 +1,17 @@ import datetime +import json import logging import re import sys -from typing import Any, Dict, List, Optional, cast +from pathlib import Path +from typing import Any, Dict, List, Optional, Union, cast from unittest import mock from unittest.mock import MagicMock import pytest from freezegun import freeze_time +from datahub.ingestion.api.source import StructuredLogLevel from datahub.ingestion.run.pipeline import Pipeline from datahub.ingestion.source.powerbi.config import ( Constant, @@ -71,597 +74,42 @@ def scan_init_response(request, context): "64ED5CAD-7C10-4684-8180-826122881108||64ED5CAD-7C22-4684-8180-826122881108": { "id": "a674efd1-603c-4129-8d82-03cf2be05aff" }, - "90E9E256-3D6D-4D38-86C8-6CCCBD8C170C": { - "id": "4278EDC0-85AA-4BF2-B96A-2BC6C82B73C3" + "A8D655A6-F521-477E-8C22-255018583BF4": { + "id": "62DAF926-0B18-4FF1-982C-2A3EB6B8F0E4" + }, + "C5DA6EA8-625E-4AB1-90B6-CAEA0BF9F492": { + "id": "81B02907-E2A3-45C3-B505-3781839C8CAA", }, } return w_id_vs_response[workspace_id] -def register_mock_api(request_mock: Any, override_data: Optional[dict] = None) -> None: - override_data = override_data or {} +def read_mock_data(path: Union[Path, str]) -> dict: + with open(path) as p: + return json.load(p) + + +def register_mock_api( + pytestconfig: pytest.Config, request_mock: Any, override_data: Optional[dict] = None +) -> None: + + default_mock_data_path = ( + pytestconfig.rootpath + / "tests/integration/powerbi/mock_data/default_mock_response.json" + ) + api_vs_response = { - "https://api.powerbi.com/v1.0/myorg/groups?%24skip=0&%24top=1000": { - "method": "GET", - "status_code": 200, - "json": { - "value": [ - { - "id": "64ED5CAD-7C10-4684-8180-826122881108", - "isReadOnly": True, - "name": "demo-workspace", - "type": "Workspace", - }, - { - "id": "64ED5CAD-7C22-4684-8180-826122881108", - "isReadOnly": True, - "name": "second-demo-workspace", - "type": "Workspace", - }, - { - "id": "64ED5CAD-7322-4684-8180-826122881108", - "isReadOnly": True, - "name": "Workspace 2", - "type": "Workspace", - }, - ], - }, - }, - "https://api.powerbi.com/v1.0/myorg/groups?%24skip=1000&%24top=1000": { - "method": "GET", - "status_code": 200, - "json": { - "value": [], - }, - }, - "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/dashboards": { - "method": "GET", - "status_code": 200, - "json": { - "value": [ - { - "id": "7D668CAD-7FFC-4505-9215-655BCA5BEBAE", - "isReadOnly": True, - "displayName": "test_dashboard", - "description": "Description of test dashboard", - "embedUrl": "https://localhost/dashboards/embed/1", - "webUrl": "https://localhost/dashboards/web/1", - } - ] - }, - }, - "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C22-4684-8180-826122881108/dashboards": { - "method": "GET", - "status_code": 200, - "json": { - "value": [ - { - "id": "7D668CAD-8FFC-4505-9215-655BCA5BEBAE", - "isReadOnly": True, - "displayName": "test_dashboard2", - "embedUrl": "https://localhost/dashboards/embed/1", - "webUrl": "https://localhost/dashboards/web/1", - } - ] - }, - }, - "https://api.powerbi.com/v1.0/myorg/admin/reports/5b218778-e7a5-4d73-8187-f10824047715/users": { - "method": "GET", - "status_code": 200, - "json": { - "value": [ - { - "identifier": "User1@foo.com", - "displayName": "user1", - "emailAddress": "User1@foo.com", - "datasetUserAccessRight": "ReadWrite", - "graphId": "C9EE53F2-88EA-4711-A173-AF0515A3CD46", - "principalType": "User", - }, - { - "identifier": "User2@foo.com", - "displayName": "user2", - "emailAddress": "User2@foo.com", - "datasetUserAccessRight": "ReadWrite", - "graphId": "C9EE53F2-88EA-4711-A173-AF0515A5REWS", - "principalType": "User", - }, - ] - }, - }, - "https://api.powerbi.com/v1.0/myorg/admin/dashboards/7D668CAD-7FFC-4505-9215-655BCA5BEBAE/users": { - "method": "GET", - "status_code": 200, - "json": { - "value": [ - { - "identifier": "User1@foo.com", - "displayName": "user1", - "emailAddress": "User1@foo.com", - "datasetUserAccessRight": "ReadWrite", - "graphId": "C9EE53F2-88EA-4711-A173-AF0515A3CD46", - "principalType": "User", - }, - { - "identifier": "User2@foo.com", - "displayName": "user2", - "emailAddress": "User2@foo.com", - "datasetUserAccessRight": "ReadWrite", - "graphId": "C9EE53F2-88EA-4711-A173-AF0515A5REWS", - "principalType": "User", - }, - ] - }, - }, - "https://api.powerbi.com/v1.0/myorg/admin/dashboards/7D668CAD-8FFC-4505-9215-655BCA5BEBAE/users": { - "method": "GET", - "status_code": 200, - "json": { - "value": [ - { - "identifier": "User3@foo.com", - "displayName": "user3", - "emailAddress": "User3@foo.com", - "datasetUserAccessRight": "ReadWrite", - "graphId": "C9EE53F2-88EA-4711-A173-AF0515A3CD46", - "principalType": "User", - }, - { - "identifier": "User4@foo.com", - "displayName": "user4", - "emailAddress": "User4@foo.com", - "datasetUserAccessRight": "ReadWrite", - "graphId": "C9EE53F2-88EA-4711-A173-AF0515A5REWS", - "principalType": "User", - }, - ] - }, - }, - "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/dashboards/7D668CAD-7FFC-4505-9215-655BCA5BEBAE/tiles": { - "method": "GET", - "status_code": 200, - "json": { - "value": [ - { - "id": "B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0", - "title": "test_tile", - "embedUrl": "https://localhost/tiles/embed/1", - "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", - }, - { - "id": "23212598-23b5-4980-87cc-5fc0ecd84385", - "title": "yearly_sales", - "embedUrl": "https://localhost/tiles/embed/2", - "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed", - }, - ] - }, - }, - "https://api.powerbi.com/v1.0/myorg/groups/90E9E256-3D6D-4D38-86C8-6CCCBD8C170C/dashboards/7D668CAD-7FFC-4505-9215-655BCA5BEBAE/tiles": { - "method": "GET", - "status_code": 200, - "json": {"value": []}, - }, - "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C22-4684-8180-826122881108/dashboards/7D668CAD-8FFC-4505-9215-655BCA5BEBAE/tiles": { - "method": "GET", - "status_code": 200, - "json": {"value": []}, - }, - "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445": { - "method": "GET", - "status_code": 200, - "json": { - "id": "05169CD2-E713-41E6-9600-1D8066D95445", - "name": "library-dataset", - "description": "Library dataset description", - "webUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445", - }, - }, - "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C22-4684-8180-826122881108/datasets/05169CD2-E713-41E6-96AA-1D8066D95445": { - "method": "GET", - "status_code": 200, - "json": { - "id": "05169CD2-E713-41E6-96AA-1D8066D95445", - "name": "library-dataset", - "description": "Library dataset description", - "webUrl": "http://localhost/groups/64ED5CAD-7C22-4684-8180-826122881108/datasets/05169CD2-E713-41E6-96AA-1D8066D95445", - }, - }, - "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed": { - "method": "GET", - "status_code": 200, - "json": { - "id": "ba0130a1-5b03-40de-9535-b34e778ea6ed", - "name": "hr_pbi_test", - "description": "hr pbi test description", - "webUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed", - }, - }, - "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/datasources": { - "method": "GET", - "status_code": 200, - "json": { - "value": [ - { - "datasourceId": "DCE90B40-84D6-467A-9A5C-648E830E72D3", - "datasourceType": "PostgreSql", - "connectionDetails": { - "database": "library_db", - "server": "foo", - }, - }, - ] - }, - }, - "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C22-4684-8180-826122881108/datasets/05169CD2-E713-41E6-96AA-1D8066D95445/datasources": { - "method": "GET", - "status_code": 200, - "json": { - "value": [ - { - "datasourceId": "DCE90B40-84D6-467A-9A5C-648E830E72D3", - "datasourceType": "PostgreSql", - "connectionDetails": { - "database": "library_db", - "server": "foo", - }, - }, - ] - }, - }, - "https://api.powerbi.com/v1.0/myorg/admin/workspaces/scanStatus/4674efd1-603c-4129-8d82-03cf2be05aff": { - "method": "GET", - "status_code": 200, - "json": { - "status": "SUCCEEDED", - }, - }, - "https://api.powerbi.com/v1.0/myorg/admin/workspaces/scanStatus/a674efd1-603c-4129-8d82-03cf2be05aff": { - "method": "GET", - "status_code": 200, - "json": { - "status": "SUCCEEDED", - }, - }, - "https://api.powerbi.com/v1.0/myorg/admin/workspaces/scanResult/4674efd1-603c-4129-8d82-03cf2be05aff": { - "method": "GET", - "status_code": 200, - "json": { - "workspaces": [ - { - "id": "64ED5CAD-7C10-4684-8180-826122881108", - "name": "demo-workspace", - "state": "Active", - "type": "Workspace", - "datasets": [ - { - "id": "05169CD2-E713-41E6-9600-1D8066D95445", - "endorsementDetails": {"endorsement": "Promoted"}, - "name": "test_sf_pbi_test", - "tables": [ - { - "name": "public issue_history", - "source": [ - { - "expression": "dummy", - } - ], - "datasourceUsages": [ - { - "datasourceInstanceId": "DCE90B40-84D6-467A-9A5C-648E830E72D3", - } - ], - }, - { - "name": "SNOWFLAKE_TESTTABLE", - "source": [ - { - "expression": 'let\n Source = Snowflake.Databases("hp123rt5.ap-southeast-2.fakecomputing.com","PBI_TEST_WAREHOUSE_PROD",[Role="PBI_TEST_MEMBER"]),\n PBI_TEST_Database = Source{[Name="PBI_TEST",Kind="Database"]}[Data],\n TEST_Schema = PBI_TEST_Database{[Name="TEST",Kind="Schema"]}[Data],\n TESTTABLE_Table = TEST_Schema{[Name="TESTTABLE",Kind="Table"]}[Data]\nin\n TESTTABLE_Table', - } - ], - "datasourceUsages": [ - { - "datasourceInstanceId": "DCE90B40-84D6-467A-9A5C-648E830E72D3", - } - ], - }, - { - "name": "snowflake native-query", - "source": [ - { - "expression": 'let\n Source = Value.NativeQuery(Snowflake.Databases("bu20658.ap-southeast-2.snowflakecomputing.com","operations_analytics_warehouse_prod",[Role="OPERATIONS_ANALYTICS_MEMBER"]){[Name="OPERATIONS_ANALYTICS"]}[Data], "SELECT#(lf)concat((UPPER(REPLACE(SELLER,\'-\',\'\'))), MONTHID) as AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,\'-\',\'\'))), MONTHID) as CD_AGENT_KEY,#(lf) *#(lf)FROM#(lf)OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_APS_SME_UNITS_V4", null, [EnableFolding=true]),\n #"Added Conditional Column" = Table.AddColumn(Source, "SME Units ENT", each if [DEAL_TYPE] = "SME Unit" then [UNIT] else 0),\n #"Added Conditional Column1" = Table.AddColumn(#"Added Conditional Column", "Banklink Units", each if [DEAL_TYPE] = "Banklink" then [UNIT] else 0),\n #"Removed Columns" = Table.RemoveColumns(#"Added Conditional Column1",{"Banklink Units"}),\n #"Added Custom" = Table.AddColumn(#"Removed Columns", "Banklink Units", each if [DEAL_TYPE] = "Banklink" and [SALES_TYPE] = "3 - Upsell"\nthen [UNIT]\n\nelse if [SALES_TYPE] = "Adjusted BL Migration"\nthen [UNIT]\n\nelse 0),\n #"Added Custom1" = Table.AddColumn(#"Added Custom", "SME Units in $ (*$361)", each if [DEAL_TYPE] = "SME Unit" \nand [SALES_TYPE] <> "4 - Renewal"\n then [UNIT] * 361\nelse 0),\n #"Added Custom2" = Table.AddColumn(#"Added Custom1", "Banklink in $ (*$148)", each [Banklink Units] * 148)\nin\n #"Added Custom2"', - } - ], - "datasourceUsages": [ - { - "datasourceInstanceId": "DCE90B40-84D6-467A-9A5C-648E830E72D3", - } - ], - }, - { - "name": "big-query-with-parameter", - "source": [ - { - "expression": 'let\n Source = GoogleBigQuery.Database([BillingProject = #"Parameter - Source"]),\n#"gcp-project" = Source{[Name=#"Parameter - Source"]}[Data],\nuniversal_Schema = #"gcp-project"{[Name="universal",Kind="Schema"]}[Data],\nD_WH_DATE_Table = universal_Schema{[Name="D_WH_DATE",Kind="Table"]}[Data],\n#"Filtered Rows" = Table.SelectRows(D_WH_DATE_Table, each [D_DATE] > #datetime(2019, 9, 10, 0, 0, 0)),\n#"Filtered Rows1" = Table.SelectRows(#"Filtered Rows", each DateTime.IsInPreviousNHours([D_DATE], 87600))\n in \n#"Filtered Rows1"', - } - ], - "datasourceUsages": [ - { - "datasourceInstanceId": "DCE90B40-84D6-467A-9A5C-648E830E72D3", - } - ], - }, - { - "name": "snowflake native-query-with-join", - "source": [ - { - "expression": 'let\n Source = Value.NativeQuery(Snowflake.Databases("xaa48144.snowflakecomputing.com","GSL_TEST_WH",[Role="ACCOUNTADMIN"]){[Name="GSL_TEST_DB"]}[Data], "select A.name from GSL_TEST_DB.PUBLIC.SALES_ANALYST as A inner join GSL_TEST_DB.PUBLIC.SALES_FORECAST as B on A.name = B.name where startswith(A.name, \'mo\')", null, [EnableFolding=true])\nin\n Source', - } - ], - "datasourceUsages": [ - { - "datasourceInstanceId": "DCE90B40-84D6-467A-9A5C-648E830E72D3", - } - ], - }, - { - "name": "job-history", - "source": [ - { - "expression": 'let\n Source = Oracle.Database("localhost:1521/salesdb.domain.com", [HierarchicalNavigation=true]), HR = Source{[Schema="HR"]}[Data], EMPLOYEES1 = HR{[Name="EMPLOYEES"]}[Data] \n in EMPLOYEES1', - } - ], - "datasourceUsages": [ - { - "datasourceInstanceId": "DCE90B40-84D6-467A-9A5C-648E830E72D3", - } - ], - }, - { - "name": "postgres_test_table", - "source": [ - { - "expression": 'let\n Source = PostgreSQL.Database("localhost" , "mics" ),\n public_order_date = Source{[Schema="public",Item="order_date"]}[Data] \n in \n public_order_date', - } - ], - "datasourceUsages": [ - { - "datasourceInstanceId": "DCE90B40-84D6-467A-9A5C-648E830E72D3", - } - ], - }, - ], - }, - { - "id": "ba0130a1-5b03-40de-9535-b34e778ea6ed", - "name": "hr_pbi_test", - "tables": [ - { - "name": "dbo_book_issue", - "source": [ - { - "expression": 'let\n Source = Sql.Database("localhost", "library"),\n dbo_book_issue = Source{[Schema="dbo",Item="book_issue"]}[Data]\n in dbo_book_issue', - } - ], - "datasourceUsages": [ - { - "datasourceInstanceId": "DCE90B40-84D6-467A-9A5C-648E830E72D3", - } - ], - }, - { - "name": "ms_sql_native_table", - "source": [ - { - "expression": 'let\n Source = Sql.Database("AUPRDWHDB", "COMMOPSDB", [Query="select *,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,\'-\',\'\'))), MONTH_WID) as CD_AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,\'-\',\'\'))), MONTH_WID) as AGENT_KEY#(lf)#(lf)from V_PS_CD_RETENTION", CommandTimeout=#duration(0, 1, 30, 0)]),\n #"Changed Type" = Table.TransformColumnTypes(Source,{{"mth_date", type date}}),\n #"Added Custom" = Table.AddColumn(#"Changed Type", "Month", each Date.Month([mth_date])),\n #"Added Custom1" = Table.AddColumn(#"Added Custom", "TPV Opening", each if [Month] = 1 then [TPV_AMV_OPENING]\nelse if [Month] = 2 then 0\nelse if [Month] = 3 then 0\nelse if [Month] = 4 then [TPV_AMV_OPENING]\nelse if [Month] = 5 then 0\nelse if [Month] = 6 then 0\nelse if [Month] = 7 then [TPV_AMV_OPENING]\nelse if [Month] = 8 then 0\nelse if [Month] = 9 then 0\nelse if [Month] = 10 then [TPV_AMV_OPENING]\nelse if [Month] = 11 then 0\nelse if [Month] = 12 then 0\n\nelse 0)\nin\n #"Added Custom1"', - } - ], - "datasourceUsages": [ - { - "datasourceInstanceId": "DCE90B40-84D6-467A-9A5C-648E830E72D3", - } - ], - }, - ], - }, - { - "id": "91580e0e-1680-4b1c-bbf9-4f6764d7a5ff", - "tables": [ - { - "name": "employee_ctc", - "source": [ - { - "expression": "dummy", - } - ], - } - ], - }, - ], - "dashboards": [ - { - "id": "7D668CAD-7FFC-4505-9215-655BCA5BEBAE", - "isReadOnly": True, - } - ], - "reports": [ - { - "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", - "id": "5b218778-e7a5-4d73-8187-f10824047715", - "reportType": "PaginatedReport", - "name": "SalesMarketing", - "description": "Acryl sales marketing report", - } - ], - }, - ] - }, - }, - "https://api.powerbi.com/v1.0/myorg/admin/workspaces/scanResult/a674efd1-603c-4129-8d82-03cf2be05aff": { - "method": "GET", - "status_code": 200, - "json": { - "workspaces": [ - { - "id": "64ED5CAD-7C22-4684-8180-826122881108", - "name": "second-demo-workspace", - "type": "Workspace", - "state": "Active", - "datasets": [ - { - "id": "05169CD2-E713-41E6-96AA-1D8066D95445", - "tables": [ - { - "name": "public articles", - "source": [ - { - "expression": "dummy", - } - ], - } - ], - } - ], - "dashboards": [ - { - "id": "7D668CAD-8FFC-4505-9215-655BCA5BEBAE", - "isReadOnly": True, - } - ], - "reports": [ - { - "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", - "id": "5b218778-e7a5-4d73-8187-f10824047715", - "reportType": "PowerBIReport", - "name": "SalesMarketing", - "description": "Acryl sales marketing report", - }, - { - "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", - "id": "584cf13a-1485-41c2-a514-b1bb66fff163", - "reportType": "PaginatedReport", - "name": "SalesMarketing", - "description": "Acryl sales marketing report", - }, - ], - }, - ] - }, - }, "https://api.powerbi.com/v1.0/myorg/admin/workspaces/getInfo": { "method": "POST", "status_code": 200, "json": scan_init_response, }, - "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/reports": { - "method": "GET", - "status_code": 200, - "json": { - "value": [ - { - "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", - "id": "5b218778-e7a5-4d73-8187-f10824047715", - "reportType": "PowerBIReport", - "name": "SalesMarketing", - "description": "Acryl sales marketing report", - "webUrl": "https://app.powerbi.com/groups/f089354e-8366-4e18-aea3-4cb4a3a50b48/reports/5b218778-e7a5-4d73-8187-f10824047715", - "embedUrl": "https://app.powerbi.com/reportEmbed?reportId=5b218778-e7a5-4d73-8187-f10824047715&groupId=f089354e-8366-4e18-aea3-4cb4a3a50b48", - }, - { - "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", - "id": "584cf13a-1485-41c2-a514-b1bb66fff163", - "reportType": "PaginatedReport", - "name": "Printable SalesMarketing", - "description": "Acryl sales marketing report", - "webUrl": "https://app.powerbi.com/groups/f089354e-8366-4e18-aea3-4cb4a3a50b48/reports/584cf13a-1485-41c2-a514-b1bb66fff163", - "embedUrl": "https://app.powerbi.com/reportEmbed?reportId=584cf13a-1485-41c2-a514-b1bb66fff163&groupId=f089354e-8366-4e18-aea3-4cb4a3a50b48", - }, - ] - }, - }, - "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/reports/5b218778-e7a5-4d73-8187-f10824047715": { - "method": "GET", - "status_code": 200, - "json": { - "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", - "id": "5b218778-e7a5-4d73-8187-f10824047715", - "reportType": "PowerBIReport", - "name": "SalesMarketing", - "description": "Acryl sales marketing report", - "webUrl": "https://app.powerbi.com/groups/f089354e-8366-4e18-aea3-4cb4a3a50b48/reports/5b218778-e7a5-4d73-8187-f10824047715", - "embedUrl": "https://app.powerbi.com/reportEmbed?reportId=5b218778-e7a5-4d73-8187-f10824047715&groupId=f089354e-8366-4e18-aea3-4cb4a3a50b48", - }, - }, - "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/reports/584cf13a-1485-41c2-a514-b1bb66fff163": { - "method": "GET", - "status_code": 200, - "json": { - "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", - "id": "584cf13a-1485-41c2-a514-b1bb66fff163", - "reportType": "PaginatedReport", - "name": "Printable SalesMarketing", - "description": "Acryl sales marketing report", - "webUrl": "https://app.powerbi.com/groups/f089354e-8366-4e18-aea3-4cb4a3a50b48/reports/584cf13a-1485-41c2-a514-b1bb66fff163", - "embedUrl": "https://app.powerbi.com/reportEmbed?reportId=584cf13a-1485-41c2-a514-b1bb66fff163&groupId=f089354e-8366-4e18-aea3-4cb4a3a50b48", - }, - }, - "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/reports/5b218778-e7a5-4d73-8187-f10824047715/pages": { - "method": "GET", - "status_code": 200, - "json": { - "value": [ - { - "displayName": "Regional Sales Analysis", - "name": "ReportSection", - "order": "0", - }, - { - "displayName": "Geographic Analysis", - "name": "ReportSection1", - "order": "1", - }, - ] - }, - }, - "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/reports/584cf13a-1485-41c2-a514-b1bb66fff163/pages": { - "method": "GET", - "status_code": 400, # Pages API is not supported for PaginatedReport - "text": '{"error":{"code":"InvalidRequest","message":"Request is currently not supported for RDL reports"}}', - }, - "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/parameters": { - "method": "GET", - "status_code": 200, - "json": { - "value": [ - { - "name": "Parameter - Source", - "type": "Text", - "isRequired": True, - "currentValue": "my-test-project", - }, - { - "name": "My bq project", - "type": "Text", - "isRequired": True, - "currentValue": "gcp_billing", - }, - ] - }, - }, - "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/91580e0e-1680-4b1c-bbf9-4f6764d7a5ff": { - "method": "GET", - "status_code": 200, - "json": { - "id": "91580e0e-1680-4b1c-bbf9-4f6764d7a5ff", - "name": "employee-dataset", - "description": "Employee Management", - "webUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/91580e0e-1680-4b1c-bbf9-4f6764d7a5ff", - }, - }, } - api_vs_response.update(override_data) + api_vs_response.update(read_mock_data(default_mock_data_path)) + + api_vs_response.update(override_data or {}) for url in api_vs_response.keys(): request_mock.register_uri( @@ -708,7 +156,7 @@ def test_powerbi_ingest( test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi" - register_mock_api(request_mock=requests_mock) + register_mock_api(pytestconfig=pytestconfig, request_mock=requests_mock) pipeline = Pipeline.create( { @@ -755,75 +203,11 @@ def test_powerbi_workspace_type_filter( register_mock_api( request_mock=requests_mock, - override_data={ - "https://api.powerbi.com/v1.0/myorg/groups?%24skip=0&%24top=1000": { - "method": "GET", - "status_code": 200, - "json": { - "value": [ - { - "id": "90E9E256-3D6D-4D38-86C8-6CCCBD8C170C", - "isReadOnly": True, - "name": "Jane Smith Workspace", - "type": "PersonalGroup", - "state": "Active", - }, - { - "id": "C6B5DBBC-7580-406C-A6BE-72628C28801C", - "isReadOnly": True, - "name": "Sales", - "type": "Workspace", - "state": "Active", - }, - ], - }, - }, - "https://api.powerbi.com/v1.0/myorg/groups?%24skip=1000&%24top=1000": { - "method": "GET", - "status_code": 200, - "json": { - "value": [], - }, - }, - "https://api.powerbi.com/v1.0/myorg/admin/workspaces/scanResult/4278EDC0-85AA-4BF2-B96A-2BC6C82B73C3": { - "method": "GET", - "status_code": 200, - "json": { - "workspaces": [ - { - "id": "90E9E256-3D6D-4D38-86C8-6CCCBD8C170C", - "name": "Jane Smith Workspace", - "type": "PersonalGroup", - "state": "Active", - "datasets": [], - }, - ] - }, - }, - "https://api.powerbi.com/v1.0/myorg/groups/90E9E256-3D6D-4D38-86C8-6CCCBD8C170C/dashboards": { - "method": "GET", - "status_code": 200, - "json": { - "value": [ - { - "id": "7D668CAD-7FFC-4505-9215-655BCA5BEBAE", - "isReadOnly": True, - "displayName": "test_dashboard", - "description": "Description of test dashboard", - "embedUrl": "https://localhost/dashboards/embed/1", - "webUrl": "https://localhost/dashboards/web/1", - } - ] - }, - }, - "https://api.powerbi.com/v1.0/myorg/admin/workspaces/scanStatus/4278EDC0-85AA-4BF2-B96A-2BC6C82B73C3": { - "method": "GET", - "status_code": 200, - "json": { - "status": "SUCCEEDED", - }, - }, - }, + pytestconfig=pytestconfig, + override_data=read_mock_data( + pytestconfig.rootpath + / "tests/integration/powerbi/mock_data/workspace_type_filter.json" + ), ) default_config: dict = default_source_config() @@ -878,7 +262,7 @@ def test_powerbi_ingest_patch_disabled( test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi" - register_mock_api(request_mock=requests_mock) + register_mock_api(pytestconfig=pytestconfig, request_mock=requests_mock) pipeline = Pipeline.create( { @@ -945,7 +329,7 @@ def test_powerbi_platform_instance_ingest( test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi" - register_mock_api(request_mock=requests_mock) + register_mock_api(pytestconfig=pytestconfig, request_mock=requests_mock) output_path: str = f"{tmp_path}/powerbi_platform_instance_mces.json" @@ -991,7 +375,7 @@ def test_powerbi_ingest_urn_lower_case( ) -> None: test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi" - register_mock_api(request_mock=requests_mock) + register_mock_api(pytestconfig=pytestconfig, request_mock=requests_mock) pipeline = Pipeline.create( { @@ -1038,7 +422,7 @@ def test_override_ownership( ) -> None: test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi" - register_mock_api(request_mock=requests_mock) + register_mock_api(pytestconfig=pytestconfig, request_mock=requests_mock) pipeline = Pipeline.create( { @@ -1083,7 +467,7 @@ def test_scan_all_workspaces( test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi" - register_mock_api(request_mock=requests_mock) + register_mock_api(pytestconfig=pytestconfig, request_mock=requests_mock) pipeline = Pipeline.create( { @@ -1135,7 +519,7 @@ def test_extract_reports( test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi" - register_mock_api(request_mock=requests_mock) + register_mock_api(pytestconfig=pytestconfig, request_mock=requests_mock) pipeline = Pipeline.create( { @@ -1181,7 +565,7 @@ def test_extract_lineage( test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi" - register_mock_api(request_mock=requests_mock) + register_mock_api(pytestconfig=pytestconfig, request_mock=requests_mock) pipeline = Pipeline.create( { @@ -1233,7 +617,7 @@ def test_extract_endorsements( ) -> None: test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi" - register_mock_api(request_mock=requests_mock) + register_mock_api(pytestconfig=pytestconfig, request_mock=requests_mock) pipeline = Pipeline.create( { @@ -1281,6 +665,7 @@ def test_admin_access_is_not_allowed( test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi" register_mock_api( + pytestconfig=pytestconfig, request_mock=requests_mock, override_data={ "https://api.powerbi.com/v1.0/myorg/admin/workspaces/getInfo": { @@ -1342,7 +727,7 @@ def test_workspace_container( test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi" - register_mock_api(request_mock=requests_mock) + register_mock_api(pytestconfig=pytestconfig, request_mock=requests_mock) pipeline = Pipeline.create( { @@ -1389,7 +774,7 @@ def test_access_token_expiry_with_long_expiry( ) -> None: enable_logging() - register_mock_api(request_mock=requests_mock) + register_mock_api(pytestconfig=pytestconfig, request_mock=requests_mock) pipeline = Pipeline.create( { @@ -1431,7 +816,7 @@ def test_access_token_expiry_with_short_expiry( ) -> None: enable_logging() - register_mock_api(request_mock=requests_mock) + register_mock_api(pytestconfig=pytestconfig, request_mock=requests_mock) pipeline = Pipeline.create( { @@ -1486,7 +871,7 @@ def test_dataset_type_mapping_should_set_to_all( """ Here we don't need to run the pipeline. We need to verify dataset_type_mapping is set to default dataplatform """ - register_mock_api(request_mock=requests_mock) + register_mock_api(pytestconfig=pytestconfig, request_mock=requests_mock) new_config: dict = {**default_source_config()} @@ -1523,7 +908,7 @@ def test_dataset_type_mapping_error( Here we don't need to run the pipeline. We need to verify if both dataset_type_mapping and server_to_platform_instance are set then value error should get raised """ - register_mock_api(request_mock=requests_mock) + register_mock_api(pytestconfig=pytestconfig, request_mock=requests_mock) with pytest.raises(Exception, match=r"dataset_type_mapping is deprecated"): Pipeline.create( @@ -1578,7 +963,7 @@ def test_server_to_platform_map( "localhost:1521": {"platform_instance": "oracle-sales-instance", "env": "PROD"}, } - register_mock_api(request_mock=requests_mock) + register_mock_api(pytestconfig=pytestconfig, request_mock=requests_mock) output_path: str = f"{tmp_path}/powerbi_server_to_platform_instance_mces.json" @@ -1684,6 +1069,7 @@ def validate_pipeline(pipeline: Pipeline) -> None: ], users=[], tags=[], + dataset_id=report[Constant.DATASET_ID], dataset=mock_workspace.datasets.get(report[Constant.DATASET_ID]), ) for report in mock_reports @@ -1711,6 +1097,7 @@ def test_reports_with_failed_page_request( Test that all reports are fetched even if a single page request fails """ register_mock_api( + pytestconfig=pytestconfig, request_mock=requests_mock, override_data={ "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/reports": { @@ -1832,6 +1219,7 @@ def test_independent_datasets_extraction( test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi" register_mock_api( + pytestconfig=pytestconfig, request_mock=requests_mock, override_data={ "https://api.powerbi.com/v1.0/myorg/groups?%24skip=0&%24top=1000": { @@ -1935,6 +1323,7 @@ def test_cll_extraction( test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi" register_mock_api( + pytestconfig=pytestconfig, request_mock=requests_mock, ) @@ -1989,6 +1378,7 @@ def test_cll_extraction_flags( ) -> None: register_mock_api( + pytestconfig=pytestconfig, request_mock=requests_mock, ) @@ -2017,3 +1407,86 @@ def test_cll_extraction_flags( }, } ) + + +@freeze_time(FROZEN_TIME) +@mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca) +@pytest.mark.integration +def test_powerbi_cross_workspace_reference_info_message( + mock_msal: MagicMock, + pytestconfig: pytest.Config, + tmp_path: str, + mock_time: datetime.datetime, + requests_mock: Any, +) -> None: + enable_logging() + + register_mock_api( + pytestconfig=pytestconfig, + request_mock=requests_mock, + override_data=read_mock_data( + path=pytestconfig.rootpath + / "tests/integration/powerbi/mock_data/cross_workspace_mock_response.json" + ), + ) + + config = default_source_config() + + del config["workspace_id"] + + config["workspace_id_pattern"] = { + "allow": [ + "A8D655A6-F521-477E-8C22-255018583BF4", + "C5DA6EA8-625E-4AB1-90B6-CAEA0BF9F492", + ] + } + + config["include_workspace_name_in_dataset_urn"] = True + + pipeline = Pipeline.create( + { + "run_id": "powerbi-test", + "source": { + "type": "powerbi", + "config": { + **config, + }, + }, + "sink": { + "type": "file", + "config": { + "filename": f"{tmp_path}/powerbi_mces.json", + }, + }, + } + ) + + pipeline.run() + pipeline.raise_from_status() + + assert isinstance(pipeline.source, PowerBiDashboardSource) # to silent the lint + + info_entries: dict = pipeline.source.reporter._structured_logs._entries.get( + StructuredLogLevel.INFO, {} + ) # type :ignore + + is_entry_present: bool = False + # Printing INFO entries + for key, entry in info_entries.items(): + if entry.title == "Missing Lineage For Tile": + is_entry_present = True + break + + assert ( + is_entry_present + ), 'Info message "Missing Lineage For Tile" should be present in reporter' + + test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi" + + golden_file = "golden_test_cross_workspace_dataset.json" + + mce_helpers.check_golden_file( + pytestconfig, + output_path=f"{tmp_path}/powerbi_mces.json", + golden_path=f"{test_resources_dir}/{golden_file}", + ) From 4be66458de67cca608377bc426b449fee8e32453 Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Wed, 16 Oct 2024 10:45:03 -0500 Subject: [PATCH 36/50] docs(custom-plugins): add overview image (#11634) --- metadata-models-custom/README.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/metadata-models-custom/README.md b/metadata-models-custom/README.md index 1d26251bc13c9..83917530a41d0 100644 --- a/metadata-models-custom/README.md +++ b/metadata-models-custom/README.md @@ -204,6 +204,19 @@ that were loaded for debugging purposes. } ``` +#### Custom Plugin Ecosystem Overview + +The following diagram shows the overall picture of the various validators, mutators, and side effects shown within +the context of typical read/write operations within DataHub. Each component is discussed in further detail in the +sections below. + +

+ +

+ +In the diagram above, the circles represent Aspects (custom aspects or standard). As the Aspects progress they can be mutated/changed, +rejected, or additional aspects can be generated by side effects. + #### Custom Validators Custom aspects might require that instances of those aspects adhere to specific conditions or rules. These conditions could vary wildly depending on the use case however they could be as simple From 5db78c6d8affde40bbaf7a6d569b3bced793aba3 Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Wed, 16 Oct 2024 12:02:58 -0500 Subject: [PATCH 37/50] fix(ci): fix build and test workflow (#11644) --- .github/workflows/build-and-test.yml | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 52148ef1b91f9..7d6df18795219 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -67,17 +67,16 @@ jobs: timezoneLinux: ${{ matrix.timezone }} - name: Check out the repo uses: acryldata/sane-checkout-action@v3 + - uses: actions/setup-python@v5 + with: + python-version: "3.10" + cache: pip - name: Set up JDK 17 uses: actions/setup-java@v4 with: distribution: "zulu" java-version: 17 - uses: gradle/actions/setup-gradle@v3 - - uses: actions/setup-python@v5 - if: ${{ needs.setup.outputs.ingestion_change == 'true' }} - with: - python-version: "3.10" - cache: pip - name: Gradle build (and test) for NOT metadata ingestion if: ${{ matrix.command == 'except_metadata_ingestion' && needs.setup.outputs.backend_change == 'true' }} run: | From d34717fd82eea5592b25cf3840fcea0a7a7acc04 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Wed, 16 Oct 2024 13:50:33 -0700 Subject: [PATCH 38/50] fix(ingest): remove default value from DatahubClientConfig.server (#11570) --- docs/how/updating-datahub.md | 4 +- .../client/dagster_generator.py | 7 - .../sensors/datahub_sensors.py | 16 +- .../api/entities/dataproduct/dataproduct.py | 2 +- .../src/datahub/ingestion/graph/config.py | 2 +- ...atahub_ingestion_checkpointing_provider.py | 16 +- metadata-ingestion/tests/conftest.py | 5 +- .../test_business_glossary.py | 12 +- .../tests/test_helpers/state_helpers.py | 10 +- .../tests/unit/graph/test_client.py | 2 +- .../state/test_redundant_run_skip_handler.py | 10 +- .../unit/stateful_ingestion/test_configs.py | 39 +-- .../tests/unit/test_glue_source.py | 18 +- .../tests/unit/test_transform_dataset.py | 254 +++++++++--------- smoke-test/pytest.ini | 3 + smoke-test/tests/test_stateful_ingestion.py | 4 - 16 files changed, 204 insertions(+), 200 deletions(-) diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md index 5b4769ed30e3e..8911d282f86bb 100644 --- a/docs/how/updating-datahub.md +++ b/docs/how/updating-datahub.md @@ -40,7 +40,9 @@ This file documents any backwards-incompatible changes in DataHub and assists pe - #11484 - Rest API authorization enabled by default - #10472 - `SANDBOX` added as a FabricType. No rollbacks allowed once metadata with this fabric type is added without manual cleanups in databases. - #11619 - schema field/column paths can no longer be empty strings -- #11619 - schema field/column paths can no longer be duplicated within the schema +- #11619 - schema field/column paths can no longer be duplicated within the schema +- #11570 - The `DatahubClientConfig`'s server field no longer defaults to `http://localhost:8080`. Be sure to explicitly set this. +- #11570 - If a `datahub_api` is explicitly passed to a stateful ingestion config provider, it will be used. We previously ignored it if the pipeline context also had a graph object. ### Potential Downtime diff --git a/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/client/dagster_generator.py b/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/client/dagster_generator.py index df9d0fc423fcf..a2cf159dd12f6 100644 --- a/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/client/dagster_generator.py +++ b/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/client/dagster_generator.py @@ -78,12 +78,6 @@ class Constant: # Default config constants DEFAULT_DATAHUB_REST_URL = "http://localhost:8080" - # Environment variable contants - DATAHUB_REST_URL = "DATAHUB_REST_URL" - DATAHUB_ENV = "DATAHUB_ENV" - DATAHUB_PLATFORM_INSTANCE = "DATAHUB_PLATFORM_INSTANCE" - DAGSTER_UI_URL = "DAGSTER_UI_URL" - # Datahub inputs/outputs constant DATAHUB_INPUTS = "datahub.inputs" DATAHUB_OUTPUTS = "datahub.outputs" @@ -154,7 +148,6 @@ class DatasetLineage(NamedTuple): class DatahubDagsterSourceConfig(DatasetSourceConfigMixin): datahub_client_config: DatahubClientConfig = pydantic.Field( - default=DatahubClientConfig(), description="Datahub client config", ) diff --git a/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/sensors/datahub_sensors.py b/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/sensors/datahub_sensors.py index 4633014222d05..ebb2c82d952b1 100644 --- a/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/sensors/datahub_sensors.py +++ b/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/sensors/datahub_sensors.py @@ -1,5 +1,6 @@ import os import traceback +import warnings from collections import defaultdict from types import ModuleType from typing import Dict, List, NamedTuple, Optional, Sequence, Set, Tuple, Union @@ -38,7 +39,7 @@ from dagster._core.events import DagsterEventType, HandledOutputData, LoadedInputData from dagster._core.execution.stats import RunStepKeyStatsSnapshot from datahub.emitter.mcp import MetadataChangeProposalWrapper -from datahub.ingestion.graph.client import DataHubGraph +from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph from datahub.metadata.schema_classes import SubTypesClass from datahub.sql_parsing.sqlglot_lineage import ( SqlParsingResult, @@ -47,6 +48,7 @@ from datahub.utilities.urns.dataset_urn import DatasetUrn from datahub_dagster_plugin.client.dagster_generator import ( + Constant, DagsterEnvironment, DagsterGenerator, DatahubDagsterSourceConfig, @@ -182,7 +184,17 @@ def __init__( if config: self.config = config else: - self.config = DatahubDagsterSourceConfig() + # This is a temporary warning for backwards compatibility. Eventually, we'll remove this + # branch and make the config required. + warnings.warn( + "Using the default DataHub client config is deprecated. Pass in a config object explicitly.", + stacklevel=2, + ) + self.config = DatahubDagsterSourceConfig( + datahub_client_config=DatahubClientConfig( + server=Constant.DEFAULT_DATAHUB_REST_URL + ) + ) self.graph = DataHubGraph( self.config.datahub_client_config, ) diff --git a/metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py b/metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py index 8f58fa469a7d9..2097922c15136 100644 --- a/metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py +++ b/metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py @@ -117,7 +117,7 @@ class DataProduct(ConfigModel): @pydantic.validator("assets", each_item=True) def assets_must_be_urns(cls, v: str) -> str: try: - Urn.create_from_string(v) + Urn.from_string(v) except Exception as e: raise ValueError(f"asset {v} is not an urn: {e}") from e diff --git a/metadata-ingestion/src/datahub/ingestion/graph/config.py b/metadata-ingestion/src/datahub/ingestion/graph/config.py index cf0ec45b71458..5f269e14e1a4a 100644 --- a/metadata-ingestion/src/datahub/ingestion/graph/config.py +++ b/metadata-ingestion/src/datahub/ingestion/graph/config.py @@ -8,7 +8,7 @@ class DatahubClientConfig(ConfigModel): # TODO: Having a default for the server doesn't make a ton of sense. This should be handled # by callers / the CLI, but the actual client should not have any magic. - server: str = "http://localhost:8080" + server: str token: Optional[str] = None timeout_sec: Optional[int] = None retry_status_codes: Optional[List[int]] = None diff --git a/metadata-ingestion/src/datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py b/metadata-ingestion/src/datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py index 442abb3aaf4cf..8f4a53ffc3ed5 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +++ b/metadata-ingestion/src/datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py @@ -17,7 +17,7 @@ class DatahubIngestionStateProviderConfig(IngestionCheckpointingProviderConfig): - datahub_api: DatahubClientConfig = DatahubClientConfig() + datahub_api: Optional[DatahubClientConfig] = None class DatahubIngestionCheckpointingProvider(IngestionCheckpointingProviderBase): @@ -31,8 +31,8 @@ def __init__( self.graph = graph if not self._is_server_stateful_ingestion_capable(): raise ConfigurationError( - "Datahub server is not capable of supporting stateful ingestion." - " Please consider upgrading to the latest server version to use this feature." + "Datahub server is not capable of supporting stateful ingestion. " + "Please consider upgrading to the latest server version to use this feature." ) @classmethod @@ -40,11 +40,15 @@ def create( cls, config_dict: Dict[str, Any], ctx: PipelineContext ) -> "DatahubIngestionCheckpointingProvider": config = DatahubIngestionStateProviderConfig.parse_obj(config_dict) - if ctx.graph: - # Use the pipeline-level graph if set + if config.datahub_api is not None: + return cls(DataHubGraph(config.datahub_api)) + elif ctx.graph: + # Use the pipeline-level graph if set. return cls(ctx.graph) else: - return cls(DataHubGraph(config.datahub_api)) + raise ValueError( + "A graph instance is required. Either pass one in the pipeline context, or set it explicitly in the stateful ingestion provider config." + ) def _is_server_stateful_ingestion_capable(self) -> bool: server_config = self.graph.get_config() if self.graph else None diff --git a/metadata-ingestion/tests/conftest.py b/metadata-ingestion/tests/conftest.py index d0716e34ee2b6..db025e7f806c0 100644 --- a/metadata-ingestion/tests/conftest.py +++ b/metadata-ingestion/tests/conftest.py @@ -25,7 +25,10 @@ docker_compose_command, docker_compose_runner, ) -from tests.test_helpers.state_helpers import mock_datahub_graph # noqa: F401,E402 +from tests.test_helpers.state_helpers import ( # noqa: F401,E402 + mock_datahub_graph, + mock_datahub_graph_instance, +) try: # See https://github.com/spulec/freezegun/issues/98#issuecomment-590553475. diff --git a/metadata-ingestion/tests/integration/business-glossary/test_business_glossary.py b/metadata-ingestion/tests/integration/business-glossary/test_business_glossary.py index b6e1aca4d4fed..73b90df65c04f 100644 --- a/metadata-ingestion/tests/integration/business-glossary/test_business_glossary.py +++ b/metadata-ingestion/tests/integration/business-glossary/test_business_glossary.py @@ -3,7 +3,6 @@ import pytest from freezegun import freeze_time -from datahub.ingestion.graph.client import DatahubClientConfig from datahub.ingestion.run.pipeline import Pipeline from datahub.ingestion.source.metadata import business_glossary from tests.test_helpers import mce_helpers @@ -41,7 +40,12 @@ def get_default_recipe( @freeze_time(FROZEN_TIME) @pytest.mark.integration def test_glossary_ingest( - mock_datahub_graph, pytestconfig, tmp_path, mock_time, enable_auto_id, golden_file + mock_datahub_graph_instance, + pytestconfig, + tmp_path, + mock_time, + enable_auto_id, + golden_file, ): test_resources_dir = pytestconfig.rootpath / "tests/integration/business-glossary" @@ -55,9 +59,7 @@ def test_glossary_ingest( enable_auto_id=enable_auto_id, ) ) - pipeline.ctx.graph = mock_datahub_graph( - DatahubClientConfig() - ) # Mock to resolve domain + pipeline.ctx.graph = mock_datahub_graph_instance pipeline.run() pipeline.raise_from_status() diff --git a/metadata-ingestion/tests/test_helpers/state_helpers.py b/metadata-ingestion/tests/test_helpers/state_helpers.py index 76f2ab283790f..f68aef742fc73 100644 --- a/metadata-ingestion/tests/test_helpers/state_helpers.py +++ b/metadata-ingestion/tests/test_helpers/state_helpers.py @@ -1,5 +1,5 @@ import types -from typing import Any, Dict, Optional, Type, cast +from typing import Any, Callable, Dict, Optional, Type, cast from unittest.mock import MagicMock, create_autospec import pytest @@ -10,6 +10,7 @@ IngestionCheckpointingProviderBase, ) from datahub.ingestion.graph.client import DataHubGraph +from datahub.ingestion.graph.config import DatahubClientConfig from datahub.ingestion.run.pipeline import Pipeline from datahub.ingestion.source.state.checkpoint import Checkpoint from datahub.ingestion.source.state.entity_removal_state import GenericCheckpointState @@ -101,6 +102,13 @@ def monkey_patch_get_latest_timeseries_value( return mock_datahub_graph_ctx.mock_graph +@pytest.fixture +def mock_datahub_graph_instance( + mock_datahub_graph: Callable[[DatahubClientConfig], DataHubGraph] +) -> DataHubGraph: + return mock_datahub_graph(DatahubClientConfig(server="http://fake.domain.local")) + + def get_current_checkpoint_from_pipeline( pipeline: Pipeline, ) -> Optional[Checkpoint[GenericCheckpointState]]: diff --git a/metadata-ingestion/tests/unit/graph/test_client.py b/metadata-ingestion/tests/unit/graph/test_client.py index faed1f51b29aa..16795ef8c7f81 100644 --- a/metadata-ingestion/tests/unit/graph/test_client.py +++ b/metadata-ingestion/tests/unit/graph/test_client.py @@ -11,7 +11,7 @@ @patch("datahub.emitter.rest_emitter.DataHubRestEmitter.test_connection") def test_get_aspect(mock_test_connection): mock_test_connection.return_value = {} - graph = DataHubGraph(DatahubClientConfig()) + graph = DataHubGraph(DatahubClientConfig(server="http://fake-domain.local")) user_urn = "urn:li:corpuser:foo" with patch("requests.Session.get") as mock_get: mock_response = Mock() diff --git a/metadata-ingestion/tests/unit/stateful_ingestion/state/test_redundant_run_skip_handler.py b/metadata-ingestion/tests/unit/stateful_ingestion/state/test_redundant_run_skip_handler.py index be6efd3e121ff..85c86f8d205d9 100644 --- a/metadata-ingestion/tests/unit/stateful_ingestion/state/test_redundant_run_skip_handler.py +++ b/metadata-ingestion/tests/unit/stateful_ingestion/state/test_redundant_run_skip_handler.py @@ -12,17 +12,11 @@ from datahub.ingestion.source.state.stale_entity_removal_handler import ( StatefulStaleMetadataRemovalConfig, ) -from datahub.ingestion.source.state.stateful_ingestion_base import ( - DynamicTypedStateProviderConfig, -) from datahub.ingestion.source.state.usage_common_state import ( BaseTimeWindowCheckpointState, ) from datahub.utilities.time import datetime_to_ts_millis -GMS_PORT = 8080 -GMS_SERVER = f"http://localhost:{GMS_PORT}" - @pytest.fixture def stateful_source(mock_datahub_graph: DataHubGraph) -> Iterable[SnowflakeV2Source]: @@ -39,9 +33,7 @@ def stateful_source(mock_datahub_graph: DataHubGraph) -> Iterable[SnowflakeV2Sou password="TST_PWD", stateful_ingestion=StatefulStaleMetadataRemovalConfig( enabled=True, - state_provider=DynamicTypedStateProviderConfig( - type="datahub", config={"datahub_api": {"server": GMS_SERVER}} - ), + # Uses the graph from the pipeline context. ), ) diff --git a/metadata-ingestion/tests/unit/stateful_ingestion/test_configs.py b/metadata-ingestion/tests/unit/stateful_ingestion/test_configs.py index 0e6d60e3440b2..ba40962866f8c 100644 --- a/metadata-ingestion/tests/unit/stateful_ingestion/test_configs.py +++ b/metadata-ingestion/tests/unit/stateful_ingestion/test_configs.py @@ -14,16 +14,12 @@ ) # 0. Common client configs. -datahub_client_configs: Dict[str, Any] = { - "full": { - "server": "http://localhost:8080", - "token": "dummy_test_tok", - "timeout_sec": 10, - "extra_headers": {}, - "max_threads": 10, - }, - "simple": {}, - "default": {}, +datahub_client_full_config = { + "server": "http://localhost:8080", + "token": "dummy_test_tok", + "timeout_sec": 10, + "extra_headers": {}, + "max_threads": 10, } @@ -41,7 +37,7 @@ "checkpointing_valid_full_config": ( DatahubIngestionStateProviderConfig, { - "datahub_api": datahub_client_configs["full"], + "datahub_api": datahub_client_full_config, }, DatahubIngestionStateProviderConfig( # This test verifies that the max_threads arg is ignored. @@ -57,27 +53,14 @@ ), False, ), - # Simple config - "checkpointing_valid_simple_config": ( - DatahubIngestionStateProviderConfig, - { - "datahub_api": datahub_client_configs["simple"], - }, - DatahubIngestionStateProviderConfig( - datahub_api=DatahubClientConfig( - server="http://localhost:8080", - ), - ), - False, - ), # Default "checkpointing_default": ( DatahubIngestionStateProviderConfig, { - "datahub_api": datahub_client_configs["default"], + "datahub_api": None, }, DatahubIngestionStateProviderConfig( - datahub_api=DatahubClientConfig(), + datahub_api=None, ), False, ), @@ -102,7 +85,7 @@ "max_checkpoint_state_size": 1024, "state_provider": { "type": "datahub", - "config": datahub_client_configs["full"], + "config": datahub_client_full_config, }, "ignore_old_state": True, "ignore_new_state": True, @@ -114,7 +97,7 @@ ignore_new_state=True, state_provider=DynamicTypedStateProviderConfig( type="datahub", - config=datahub_client_configs["full"], + config=datahub_client_full_config, ), ), False, diff --git a/metadata-ingestion/tests/unit/test_glue_source.py b/metadata-ingestion/tests/unit/test_glue_source.py index 45b9899eacaa7..eb1e7f3fe41d9 100644 --- a/metadata-ingestion/tests/unit/test_glue_source.py +++ b/metadata-ingestion/tests/unit/test_glue_source.py @@ -1,6 +1,6 @@ import json from pathlib import Path -from typing import Any, Callable, Dict, Optional, Tuple, Type, cast +from typing import Any, Dict, Optional, Tuple, Type, cast from unittest.mock import patch import pydantic @@ -11,7 +11,7 @@ import datahub.metadata.schema_classes as models from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.extractor.schema_util import avro_schema_to_mce_fields -from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph +from datahub.ingestion.graph.client import DataHubGraph from datahub.ingestion.sink.file import write_metadata_file from datahub.ingestion.source.aws.glue import ( GlueProfilingConfig, @@ -74,7 +74,7 @@ def glue_source( platform_instance: Optional[str] = None, - mock_datahub_graph: Optional[Callable[[DatahubClientConfig], DataHubGraph]] = None, + mock_datahub_graph_instance: Optional[DataHubGraph] = None, use_s3_bucket_tags: bool = True, use_s3_object_tags: bool = True, extract_delta_schema_from_parameters: bool = False, @@ -83,8 +83,8 @@ def glue_source( extract_transforms: bool = True, ) -> GlueSource: pipeline_context = PipelineContext(run_id="glue-source-tes") - if mock_datahub_graph: - pipeline_context.graph = mock_datahub_graph(DatahubClientConfig()) + if mock_datahub_graph_instance: + pipeline_context.graph = mock_datahub_graph_instance return GlueSource( ctx=pipeline_context, config=GlueSourceConfig( @@ -493,14 +493,14 @@ def test_glue_with_malformed_delta_schema_ingest( def test_glue_ingest_include_table_lineage( tmp_path: Path, pytestconfig: PytestConfig, - mock_datahub_graph: Callable[[DatahubClientConfig], DataHubGraph], + mock_datahub_graph_instance: DataHubGraph, platform_instance: str, mce_file: str, mce_golden_file: str, ) -> None: glue_source_instance = glue_source( platform_instance=platform_instance, - mock_datahub_graph=mock_datahub_graph, + mock_datahub_graph_instance=mock_datahub_graph_instance, emit_s3_lineage=True, ) @@ -589,14 +589,14 @@ def test_glue_ingest_include_table_lineage( def test_glue_ingest_include_column_lineage( tmp_path: Path, pytestconfig: PytestConfig, - mock_datahub_graph: Callable[[DatahubClientConfig], DataHubGraph], + mock_datahub_graph_instance: DataHubGraph, platform_instance: str, mce_file: str, mce_golden_file: str, ) -> None: glue_source_instance = glue_source( platform_instance=platform_instance, - mock_datahub_graph=mock_datahub_graph, + mock_datahub_graph_instance=mock_datahub_graph_instance, emit_s3_lineage=True, include_column_lineage=True, use_s3_bucket_tags=False, diff --git a/metadata-ingestion/tests/unit/test_transform_dataset.py b/metadata-ingestion/tests/unit/test_transform_dataset.py index 46c6390b184d3..2e2e85b5d1811 100644 --- a/metadata-ingestion/tests/unit/test_transform_dataset.py +++ b/metadata-ingestion/tests/unit/test_transform_dataset.py @@ -1,17 +1,7 @@ import json import re from datetime import datetime, timezone -from typing import ( - Any, - Callable, - Dict, - List, - MutableSequence, - Optional, - Type, - Union, - cast, -) +from typing import Any, Dict, List, MutableSequence, Optional, Type, Union, cast from unittest import mock from uuid import uuid4 @@ -24,7 +14,7 @@ from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.ingestion.api import workunit from datahub.ingestion.api.common import EndOfStream, PipelineContext, RecordEnvelope -from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph +from datahub.ingestion.graph.client import DataHubGraph from datahub.ingestion.run.pipeline import Pipeline from datahub.ingestion.transformer.add_dataset_browse_path import ( AddDatasetBrowsePathTransformer, @@ -1106,7 +1096,7 @@ def test_pattern_dataset_ownership_with_invalid_type_transformation(mock_time): def test_pattern_container_and_dataset_ownership_transformation( - mock_time, mock_datahub_graph + mock_time, mock_datahub_graph_instance ): def fake_get_aspect( entity_urn: str, @@ -1127,7 +1117,7 @@ def fake_get_aspect( pipeline_context = PipelineContext( run_id="test_pattern_container_and_dataset_ownership_transformation" ) - pipeline_context.graph = mock_datahub_graph(DatahubClientConfig()) + pipeline_context.graph = mock_datahub_graph_instance pipeline_context.graph.get_aspect = fake_get_aspect # type: ignore # No owner aspect for the first dataset @@ -1240,7 +1230,7 @@ def fake_get_aspect( def test_pattern_container_and_dataset_ownership_with_no_container( - mock_time, mock_datahub_graph + mock_time, mock_datahub_graph_instance ): def fake_get_aspect( entity_urn: str, @@ -1252,7 +1242,7 @@ def fake_get_aspect( pipeline_context = PipelineContext( run_id="test_pattern_container_and_dataset_ownership_with_no_container" ) - pipeline_context.graph = mock_datahub_graph(DatahubClientConfig()) + pipeline_context.graph = mock_datahub_graph_instance pipeline_context.graph.get_aspect = fake_get_aspect # type: ignore # No owner aspect for the first dataset @@ -1357,7 +1347,7 @@ def fake_get_aspect( def test_pattern_container_and_dataset_ownership_with_no_match( - mock_time, mock_datahub_graph + mock_time, mock_datahub_graph_instance ): def fake_get_aspect( entity_urn: str, @@ -1375,7 +1365,7 @@ def fake_get_aspect( pipeline_context = PipelineContext( run_id="test_pattern_container_and_dataset_ownership_with_no_match" ) - pipeline_context.graph = mock_datahub_graph(DatahubClientConfig()) + pipeline_context.graph = mock_datahub_graph_instance pipeline_context.graph.get_aspect = fake_get_aspect # type: ignore # No owner aspect for the first dataset @@ -1598,10 +1588,10 @@ def run_simple_add_dataset_properties_transformer_semantics( semantics: TransformerSemantics, new_properties: dict, server_properties: dict, - mock_datahub_graph: Callable[[DatahubClientConfig], DataHubGraph], + mock_datahub_graph_instance: DataHubGraph, ) -> List[RecordEnvelope]: pipeline_context = PipelineContext(run_id="test_pattern_dataset_schema_terms") - pipeline_context.graph = mock_datahub_graph(DatahubClientConfig()) + pipeline_context.graph = mock_datahub_graph_instance # fake the server response def fake_dataset_properties(entity_urn: str) -> models.DatasetPropertiesClass: @@ -1624,7 +1614,7 @@ def fake_dataset_properties(entity_urn: str) -> models.DatasetPropertiesClass: return output -def test_simple_add_dataset_properties_overwrite(mock_datahub_graph): +def test_simple_add_dataset_properties_overwrite(mock_datahub_graph_instance): new_properties = {"new-simple-property": "new-value"} server_properties = {"p1": "value1"} @@ -1632,7 +1622,7 @@ def test_simple_add_dataset_properties_overwrite(mock_datahub_graph): semantics=TransformerSemantics.OVERWRITE, new_properties=new_properties, server_properties=server_properties, - mock_datahub_graph=mock_datahub_graph, + mock_datahub_graph_instance=mock_datahub_graph_instance, ) assert len(output) == 2 @@ -1648,7 +1638,7 @@ def test_simple_add_dataset_properties_overwrite(mock_datahub_graph): } -def test_simple_add_dataset_properties_patch(mock_datahub_graph): +def test_simple_add_dataset_properties_patch(mock_datahub_graph_instance): new_properties = {"new-simple-property": "new-value"} server_properties = {"p1": "value1"} @@ -1656,7 +1646,7 @@ def test_simple_add_dataset_properties_patch(mock_datahub_graph): semantics=TransformerSemantics.PATCH, new_properties=new_properties, server_properties=server_properties, - mock_datahub_graph=mock_datahub_graph, + mock_datahub_graph_instance=mock_datahub_graph_instance, ) assert len(output) == 2 @@ -2334,24 +2324,24 @@ def run_container_transformer_pipeline( return outputs -def test_simple_add_dataset_domain_aspect_name(mock_datahub_graph): +def test_simple_add_dataset_domain_aspect_name(mock_datahub_graph_instance): pipeline_context: PipelineContext = PipelineContext( run_id="test_simple_add_dataset_domain" ) - pipeline_context.graph = mock_datahub_graph(DatahubClientConfig) + pipeline_context.graph = mock_datahub_graph_instance transformer = SimpleAddDatasetDomain.create({"domains": []}, pipeline_context) assert transformer.aspect_name() == models.DomainsClass.ASPECT_NAME -def test_simple_add_dataset_domain(mock_datahub_graph): +def test_simple_add_dataset_domain(mock_datahub_graph_instance): acryl_domain = builder.make_domain_urn("acryl.io") datahub_domain = builder.make_domain_urn("datahubproject.io") pipeline_context: PipelineContext = PipelineContext( run_id="test_simple_add_dataset_domain" ) - pipeline_context.graph = mock_datahub_graph(DatahubClientConfig) + pipeline_context.graph = mock_datahub_graph_instance output = run_dataset_transformer_pipeline( transformer_type=SimpleAddDatasetDomain, @@ -2372,14 +2362,14 @@ def test_simple_add_dataset_domain(mock_datahub_graph): assert acryl_domain in transformed_aspect.domains -def test_simple_add_dataset_domain_mce_support(mock_datahub_graph): +def test_simple_add_dataset_domain_mce_support(mock_datahub_graph_instance): acryl_domain = builder.make_domain_urn("acryl.io") datahub_domain = builder.make_domain_urn("datahubproject.io") pipeline_context: PipelineContext = PipelineContext( run_id="test_simple_add_dataset_domain" ) - pipeline_context.graph = mock_datahub_graph(DatahubClientConfig) + pipeline_context.graph = mock_datahub_graph_instance output = run_dataset_transformer_pipeline( transformer_type=SimpleAddDatasetDomain, @@ -2403,14 +2393,14 @@ def test_simple_add_dataset_domain_mce_support(mock_datahub_graph): assert acryl_domain in transformed_aspect.domains -def test_simple_add_dataset_domain_replace_existing(mock_datahub_graph): +def test_simple_add_dataset_domain_replace_existing(mock_datahub_graph_instance): acryl_domain = builder.make_domain_urn("acryl.io") datahub_domain = builder.make_domain_urn("datahubproject.io") pipeline_context: PipelineContext = PipelineContext( run_id="test_simple_add_dataset_domain" ) - pipeline_context.graph = mock_datahub_graph(DatahubClientConfig) + pipeline_context.graph = mock_datahub_graph_instance output = run_dataset_transformer_pipeline( transformer_type=SimpleAddDatasetDomain, @@ -2431,13 +2421,13 @@ def test_simple_add_dataset_domain_replace_existing(mock_datahub_graph): assert acryl_domain in transformed_aspect.domains -def test_simple_add_dataset_domain_semantics_overwrite(mock_datahub_graph): +def test_simple_add_dataset_domain_semantics_overwrite(mock_datahub_graph_instance): acryl_domain = builder.make_domain_urn("acryl.io") datahub_domain = builder.make_domain_urn("datahubproject.io") server_domain = builder.make_domain_urn("test.io") pipeline_context = PipelineContext(run_id="transformer_pipe_line") - pipeline_context.graph = mock_datahub_graph(DatahubClientConfig()) + pipeline_context.graph = mock_datahub_graph_instance # Return fake aspect to simulate server behaviour def fake_get_domain(entity_urn: str) -> models.DomainsClass: @@ -2469,14 +2459,14 @@ def fake_get_domain(entity_urn: str) -> models.DomainsClass: def test_simple_add_dataset_domain_semantics_patch( - pytestconfig, tmp_path, mock_time, mock_datahub_graph + pytestconfig, tmp_path, mock_time, mock_datahub_graph_instance ): acryl_domain = builder.make_domain_urn("acryl.io") datahub_domain = builder.make_domain_urn("datahubproject.io") server_domain = builder.make_domain_urn("test.io") pipeline_context = PipelineContext(run_id="transformer_pipe_line") - pipeline_context.graph = mock_datahub_graph(DatahubClientConfig()) + pipeline_context.graph = mock_datahub_graph_instance # Return fake aspect to simulate server behaviour def fake_get_domain(entity_urn: str) -> models.DomainsClass: @@ -2508,11 +2498,11 @@ def fake_get_domain(entity_urn: str) -> models.DomainsClass: assert server_domain in transformed_aspect.domains -def test_pattern_add_dataset_domain_aspect_name(mock_datahub_graph): +def test_pattern_add_dataset_domain_aspect_name(mock_datahub_graph_instance): pipeline_context: PipelineContext = PipelineContext( run_id="test_simple_add_dataset_domain" ) - pipeline_context.graph = mock_datahub_graph(DatahubClientConfig) + pipeline_context.graph = mock_datahub_graph_instance transformer = PatternAddDatasetDomain.create( {"domain_pattern": {"rules": {}}}, pipeline_context @@ -2520,7 +2510,7 @@ def test_pattern_add_dataset_domain_aspect_name(mock_datahub_graph): assert transformer.aspect_name() == models.DomainsClass.ASPECT_NAME -def test_pattern_add_dataset_domain_match(mock_datahub_graph): +def test_pattern_add_dataset_domain_match(mock_datahub_graph_instance): acryl_domain = builder.make_domain_urn("acryl.io") datahub_domain = builder.make_domain_urn("datahubproject.io") pattern = "urn:li:dataset:\\(urn:li:dataPlatform:bigquery,.*" @@ -2528,7 +2518,7 @@ def test_pattern_add_dataset_domain_match(mock_datahub_graph): pipeline_context: PipelineContext = PipelineContext( run_id="test_simple_add_dataset_domain" ) - pipeline_context.graph = mock_datahub_graph(DatahubClientConfig) + pipeline_context.graph = mock_datahub_graph_instance output = run_dataset_transformer_pipeline( transformer_type=PatternAddDatasetDomain, @@ -2551,7 +2541,7 @@ def test_pattern_add_dataset_domain_match(mock_datahub_graph): assert acryl_domain in transformed_aspect.domains -def test_pattern_add_dataset_domain_no_match(mock_datahub_graph): +def test_pattern_add_dataset_domain_no_match(mock_datahub_graph_instance): acryl_domain = builder.make_domain_urn("acryl.io") datahub_domain = builder.make_domain_urn("datahubproject.io") pattern = "urn:li:dataset:\\(urn:li:dataPlatform:invalid,.*" @@ -2559,7 +2549,7 @@ def test_pattern_add_dataset_domain_no_match(mock_datahub_graph): pipeline_context: PipelineContext = PipelineContext( run_id="test_simple_add_dataset_domain" ) - pipeline_context.graph = mock_datahub_graph(DatahubClientConfig) + pipeline_context.graph = mock_datahub_graph_instance output = run_dataset_transformer_pipeline( transformer_type=PatternAddDatasetDomain, @@ -2582,7 +2572,7 @@ def test_pattern_add_dataset_domain_no_match(mock_datahub_graph): assert acryl_domain not in transformed_aspect.domains -def test_pattern_add_dataset_domain_replace_existing_match(mock_datahub_graph): +def test_pattern_add_dataset_domain_replace_existing_match(mock_datahub_graph_instance): acryl_domain = builder.make_domain_urn("acryl.io") datahub_domain = builder.make_domain_urn("datahubproject.io") pattern = "urn:li:dataset:\\(urn:li:dataPlatform:bigquery,.*" @@ -2590,7 +2580,7 @@ def test_pattern_add_dataset_domain_replace_existing_match(mock_datahub_graph): pipeline_context: PipelineContext = PipelineContext( run_id="test_simple_add_dataset_domain" ) - pipeline_context.graph = mock_datahub_graph(DatahubClientConfig) + pipeline_context.graph = mock_datahub_graph_instance output = run_dataset_transformer_pipeline( transformer_type=PatternAddDatasetDomain, @@ -2614,7 +2604,9 @@ def test_pattern_add_dataset_domain_replace_existing_match(mock_datahub_graph): assert acryl_domain in transformed_aspect.domains -def test_pattern_add_dataset_domain_replace_existing_no_match(mock_datahub_graph): +def test_pattern_add_dataset_domain_replace_existing_no_match( + mock_datahub_graph_instance, +): acryl_domain = builder.make_domain_urn("acryl.io") datahub_domain = builder.make_domain_urn("datahubproject.io") pattern = "urn:li:dataset:\\(urn:li:dataPlatform:invalid,.*" @@ -2622,7 +2614,7 @@ def test_pattern_add_dataset_domain_replace_existing_no_match(mock_datahub_graph pipeline_context: PipelineContext = PipelineContext( run_id="test_simple_add_dataset_domain" ) - pipeline_context.graph = mock_datahub_graph(DatahubClientConfig) + pipeline_context.graph = mock_datahub_graph_instance output = run_dataset_transformer_pipeline( transformer_type=PatternAddDatasetDomain, @@ -2644,14 +2636,14 @@ def test_pattern_add_dataset_domain_replace_existing_no_match(mock_datahub_graph assert len(transformed_aspect.domains) == 0 -def test_pattern_add_dataset_domain_semantics_overwrite(mock_datahub_graph): +def test_pattern_add_dataset_domain_semantics_overwrite(mock_datahub_graph_instance): acryl_domain = builder.make_domain_urn("acryl.io") datahub_domain = builder.make_domain_urn("datahubproject.io") server_domain = builder.make_domain_urn("test.io") pattern = "urn:li:dataset:\\(urn:li:dataPlatform:bigquery,.*" pipeline_context = PipelineContext(run_id="transformer_pipe_line") - pipeline_context.graph = mock_datahub_graph(DatahubClientConfig()) + pipeline_context.graph = mock_datahub_graph_instance # Return fake aspect to simulate server behaviour def fake_get_domain(entity_urn: str) -> models.DomainsClass: @@ -2683,7 +2675,7 @@ def fake_get_domain(entity_urn: str) -> models.DomainsClass: def test_pattern_add_dataset_domain_semantics_patch( - pytestconfig, tmp_path, mock_time, mock_datahub_graph + pytestconfig, tmp_path, mock_time, mock_datahub_graph_instance ): acryl_domain = builder.make_domain_urn("acryl.io") datahub_domain = builder.make_domain_urn("datahubproject.io") @@ -2691,7 +2683,7 @@ def test_pattern_add_dataset_domain_semantics_patch( pattern = "urn:li:dataset:\\(urn:li:dataPlatform:bigquery,.*" pipeline_context = PipelineContext(run_id="transformer_pipe_line") - pipeline_context.graph = mock_datahub_graph(DatahubClientConfig()) + pipeline_context.graph = mock_datahub_graph_instance # Return fake aspect to simulate server behaviour def fake_get_domain(entity_urn: str) -> models.DomainsClass: @@ -2723,9 +2715,11 @@ def fake_get_domain(entity_urn: str) -> models.DomainsClass: assert server_domain in transformed_aspect.domains -def test_simple_dataset_ownership_transformer_semantics_patch(mock_datahub_graph): +def test_simple_dataset_ownership_transformer_semantics_patch( + mock_datahub_graph_instance, +): pipeline_context = PipelineContext(run_id="transformer_pipe_line") - pipeline_context.graph = mock_datahub_graph(DatahubClientConfig()) + pipeline_context.graph = mock_datahub_graph_instance server_owner: str = builder.make_owner_urn( "mohd@acryl.io", owner_type=builder.OwnerType.USER @@ -2783,7 +2777,9 @@ def fake_ownership_class(entity_urn: str) -> models.OwnershipClass: assert server_owner in owner_urns -def test_pattern_container_and_dataset_domain_transformation(mock_datahub_graph): +def test_pattern_container_and_dataset_domain_transformation( + mock_datahub_graph_instance, +): datahub_domain = builder.make_domain_urn("datahubproject.io") acryl_domain = builder.make_domain_urn("acryl_domain") server_domain = builder.make_domain_urn("server_domain") @@ -2807,7 +2803,7 @@ def fake_get_aspect( pipeline_context = PipelineContext( run_id="test_pattern_container_and_dataset_domain_transformation" ) - pipeline_context.graph = mock_datahub_graph(DatahubClientConfig()) + pipeline_context.graph = mock_datahub_graph_instance pipeline_context.graph.get_aspect = fake_get_aspect # type: ignore with_domain_aspect = make_generic_dataset_mcp( @@ -2889,7 +2885,7 @@ def fake_get_aspect( def test_pattern_container_and_dataset_domain_transformation_with_no_container( - mock_datahub_graph, + mock_datahub_graph_instance, ): datahub_domain = builder.make_domain_urn("datahubproject.io") acryl_domain = builder.make_domain_urn("acryl_domain") @@ -2905,7 +2901,7 @@ def fake_get_aspect( pipeline_context = PipelineContext( run_id="test_pattern_container_and_dataset_domain_transformation_with_no_container" ) - pipeline_context.graph = mock_datahub_graph(DatahubClientConfig()) + pipeline_context.graph = mock_datahub_graph_instance pipeline_context.graph.get_aspect = fake_get_aspect # type: ignore with_domain_aspect = make_generic_dataset_mcp( @@ -2955,7 +2951,7 @@ def fake_get_aspect( assert server_domain in second_domain_aspect.domains -def test_pattern_add_container_dataset_domain_no_match(mock_datahub_graph): +def test_pattern_add_container_dataset_domain_no_match(mock_datahub_graph_instance): acryl_domain = builder.make_domain_urn("acryl.io") datahub_domain = builder.make_domain_urn("datahubproject.io") pattern = "urn:li:dataset:\\(urn:li:dataPlatform:invalid,.*" @@ -2963,7 +2959,7 @@ def test_pattern_add_container_dataset_domain_no_match(mock_datahub_graph): pipeline_context: PipelineContext = PipelineContext( run_id="test_simple_add_dataset_domain" ) - pipeline_context.graph = mock_datahub_graph(DatahubClientConfig) + pipeline_context.graph = mock_datahub_graph_instance def fake_get_aspect( entity_urn: str, @@ -3003,10 +2999,10 @@ def fake_get_aspect( def run_pattern_dataset_schema_terms_transformation_semantics( semantics: TransformerSemantics, - mock_datahub_graph: Callable[[DatahubClientConfig], DataHubGraph], + mock_datahub_graph_instance: DataHubGraph, ) -> List[RecordEnvelope]: pipeline_context = PipelineContext(run_id="test_pattern_dataset_schema_terms") - pipeline_context.graph = mock_datahub_graph(DatahubClientConfig()) + pipeline_context.graph = mock_datahub_graph_instance # fake the server response def fake_schema_metadata(entity_urn: str) -> models.SchemaMetadataClass: @@ -3113,10 +3109,10 @@ def fake_schema_metadata(entity_urn: str) -> models.SchemaMetadataClass: def test_pattern_dataset_schema_terms_transformation_patch( - mock_time, mock_datahub_graph + mock_time, mock_datahub_graph_instance ): output = run_pattern_dataset_schema_terms_transformation_semantics( - TransformerSemantics.PATCH, mock_datahub_graph + TransformerSemantics.PATCH, mock_datahub_graph_instance ) assert len(output) == 2 # Check that glossary terms were added. @@ -3146,10 +3142,10 @@ def test_pattern_dataset_schema_terms_transformation_patch( def test_pattern_dataset_schema_terms_transformation_overwrite( - mock_time, mock_datahub_graph + mock_time, mock_datahub_graph_instance ): output = run_pattern_dataset_schema_terms_transformation_semantics( - TransformerSemantics.OVERWRITE, mock_datahub_graph + TransformerSemantics.OVERWRITE, mock_datahub_graph_instance ) assert len(output) == 2 @@ -3181,10 +3177,10 @@ def test_pattern_dataset_schema_terms_transformation_overwrite( def run_pattern_dataset_schema_tags_transformation_semantics( semantics: TransformerSemantics, - mock_datahub_graph: Callable[[DatahubClientConfig], DataHubGraph], + mock_datahub_graph_instance: DataHubGraph, ) -> List[RecordEnvelope]: pipeline_context = PipelineContext(run_id="test_pattern_dataset_schema_terms") - pipeline_context.graph = mock_datahub_graph(DatahubClientConfig()) + pipeline_context.graph = mock_datahub_graph_instance # fake the server response def fake_schema_metadata(entity_urn: str) -> models.SchemaMetadataClass: @@ -3284,10 +3280,10 @@ def fake_schema_metadata(entity_urn: str) -> models.SchemaMetadataClass: def test_pattern_dataset_schema_tags_transformation_overwrite( - mock_time, mock_datahub_graph + mock_time, mock_datahub_graph_instance ): output = run_pattern_dataset_schema_tags_transformation_semantics( - TransformerSemantics.OVERWRITE, mock_datahub_graph + TransformerSemantics.OVERWRITE, mock_datahub_graph_instance ) assert len(output) == 2 @@ -3318,10 +3314,10 @@ def test_pattern_dataset_schema_tags_transformation_overwrite( def test_pattern_dataset_schema_tags_transformation_patch( - mock_time, mock_datahub_graph + mock_time, mock_datahub_graph_instance ): output = run_pattern_dataset_schema_tags_transformation_semantics( - TransformerSemantics.PATCH, mock_datahub_graph + TransformerSemantics.PATCH, mock_datahub_graph_instance ) assert len(output) == 2 @@ -3542,9 +3538,11 @@ def fake_ownership_class(entity_urn: str) -> models.OwnershipClass: assert set(out_owners) == set(cleaned_owner_urn) -def test_clean_owner_urn_transformation_remove_fixed_string(mock_datahub_graph): +def test_clean_owner_urn_transformation_remove_fixed_string( + mock_datahub_graph_instance, +): pipeline_context = PipelineContext(run_id="transformer_pipe_line") - pipeline_context.graph = mock_datahub_graph(DatahubClientConfig()) + pipeline_context.graph = mock_datahub_graph_instance user_emails = [ "ABCDEF:email_id@example.com", @@ -3581,9 +3579,11 @@ def test_clean_owner_urn_transformation_remove_fixed_string(mock_datahub_graph): _test_clean_owner_urns(pipeline_context, in_owner_urns, config, expected_owner_urns) -def test_clean_owner_urn_transformation_remove_multiple_values(mock_datahub_graph): +def test_clean_owner_urn_transformation_remove_multiple_values( + mock_datahub_graph_instance, +): pipeline_context = PipelineContext(run_id="transformer_pipe_line") - pipeline_context.graph = mock_datahub_graph(DatahubClientConfig()) + pipeline_context.graph = mock_datahub_graph_instance user_emails = [ "ABCDEF:email_id@example.com", @@ -3620,9 +3620,11 @@ def test_clean_owner_urn_transformation_remove_multiple_values(mock_datahub_grap _test_clean_owner_urns(pipeline_context, in_owner_urns, config, expected_owner_urns) -def test_clean_owner_urn_transformation_remove_values_using_regex(mock_datahub_graph): +def test_clean_owner_urn_transformation_remove_values_using_regex( + mock_datahub_graph_instance, +): pipeline_context = PipelineContext(run_id="transformer_pipe_line") - pipeline_context.graph = mock_datahub_graph(DatahubClientConfig()) + pipeline_context.graph = mock_datahub_graph_instance user_emails = [ "ABCDEF:email_id@example.com", @@ -3659,9 +3661,9 @@ def test_clean_owner_urn_transformation_remove_values_using_regex(mock_datahub_g _test_clean_owner_urns(pipeline_context, in_owner_urns, config, expected_owner_urns) -def test_clean_owner_urn_transformation_remove_digits(mock_datahub_graph): +def test_clean_owner_urn_transformation_remove_digits(mock_datahub_graph_instance): pipeline_context = PipelineContext(run_id="transformer_pipe_line") - pipeline_context.graph = mock_datahub_graph(DatahubClientConfig()) + pipeline_context.graph = mock_datahub_graph_instance user_emails = [ "ABCDEF:email_id@example.com", @@ -3698,9 +3700,9 @@ def test_clean_owner_urn_transformation_remove_digits(mock_datahub_graph): _test_clean_owner_urns(pipeline_context, in_owner_urns, config, expected_owner_urns) -def test_clean_owner_urn_transformation_remove_pattern(mock_datahub_graph): +def test_clean_owner_urn_transformation_remove_pattern(mock_datahub_graph_instance): pipeline_context = PipelineContext(run_id="transformer_pipe_line") - pipeline_context.graph = mock_datahub_graph(DatahubClientConfig()) + pipeline_context.graph = mock_datahub_graph_instance user_emails = [ "ABCDEF:email_id@example.com", @@ -3738,10 +3740,10 @@ def test_clean_owner_urn_transformation_remove_pattern(mock_datahub_graph): def test_clean_owner_urn_transformation_remove_word_in_capital_letters( - mock_datahub_graph, + mock_datahub_graph_instance, ): pipeline_context = PipelineContext(run_id="transformer_pipe_line") - pipeline_context.graph = mock_datahub_graph(DatahubClientConfig()) + pipeline_context.graph = mock_datahub_graph_instance user_emails = [ "ABCDEF:email_id@example.com", @@ -3781,10 +3783,10 @@ def test_clean_owner_urn_transformation_remove_word_in_capital_letters( def test_clean_owner_urn_transformation_remove_pattern_with_alphanumeric_value( - mock_datahub_graph, + mock_datahub_graph_instance, ): pipeline_context = PipelineContext(run_id="transformer_pipe_line") - pipeline_context.graph = mock_datahub_graph(DatahubClientConfig()) + pipeline_context.graph = mock_datahub_graph_instance user_emails = [ "ABCDEF:email_id@example.com", @@ -3822,10 +3824,10 @@ def test_clean_owner_urn_transformation_remove_pattern_with_alphanumeric_value( def test_clean_owner_urn_transformation_should_not_remove_system_identifier( - mock_datahub_graph, + mock_datahub_graph_instance, ): pipeline_context = PipelineContext(run_id="transformer_pipe_line") - pipeline_context.graph = mock_datahub_graph(DatahubClientConfig()) + pipeline_context.graph = mock_datahub_graph_instance user_emails = [ "ABCDEF:email_id@example.com", @@ -3850,12 +3852,12 @@ def test_clean_owner_urn_transformation_should_not_remove_system_identifier( def test_replace_external_url_word_replace( - mock_datahub_graph, + mock_datahub_graph_instance, ): pipeline_context: PipelineContext = PipelineContext( run_id="test_replace_external_url" ) - pipeline_context.graph = mock_datahub_graph(DatahubClientConfig) + pipeline_context.graph = mock_datahub_graph_instance output = run_dataset_transformer_pipeline( transformer_type=ReplaceExternalUrlDataset, @@ -3877,12 +3879,12 @@ def test_replace_external_url_word_replace( def test_replace_external_regex_replace_1( - mock_datahub_graph, + mock_datahub_graph_instance, ): pipeline_context: PipelineContext = PipelineContext( run_id="test_replace_external_url" ) - pipeline_context.graph = mock_datahub_graph(DatahubClientConfig) + pipeline_context.graph = mock_datahub_graph_instance output = run_dataset_transformer_pipeline( transformer_type=ReplaceExternalUrlDataset, @@ -3904,12 +3906,12 @@ def test_replace_external_regex_replace_1( def test_replace_external_regex_replace_2( - mock_datahub_graph, + mock_datahub_graph_instance, ): pipeline_context: PipelineContext = PipelineContext( run_id="test_replace_external_url" ) - pipeline_context.graph = mock_datahub_graph(DatahubClientConfig) + pipeline_context.graph = mock_datahub_graph_instance output = run_dataset_transformer_pipeline( transformer_type=ReplaceExternalUrlDataset, @@ -3931,12 +3933,12 @@ def test_replace_external_regex_replace_2( def test_pattern_cleanup_usage_statistics_user_1( - mock_datahub_graph, + mock_datahub_graph_instance, ): pipeline_context: PipelineContext = PipelineContext( run_id="test_pattern_cleanup_usage_statistics_user" ) - pipeline_context.graph = mock_datahub_graph(DatahubClientConfig) + pipeline_context.graph = mock_datahub_graph_instance TS_1 = datetime(year=2023, month=1, day=1, tzinfo=timezone.utc) @@ -3985,12 +3987,12 @@ def test_pattern_cleanup_usage_statistics_user_1( def test_pattern_cleanup_usage_statistics_user_2( - mock_datahub_graph, + mock_datahub_graph_instance, ): pipeline_context: PipelineContext = PipelineContext( run_id="test_pattern_cleanup_usage_statistics_user" ) - pipeline_context.graph = mock_datahub_graph(DatahubClientConfig) + pipeline_context.graph = mock_datahub_graph_instance TS_1 = datetime(year=2023, month=1, day=1, tzinfo=timezone.utc) @@ -4039,12 +4041,12 @@ def test_pattern_cleanup_usage_statistics_user_2( def test_pattern_cleanup_usage_statistics_user_3( - mock_datahub_graph, + mock_datahub_graph_instance, ): pipeline_context: PipelineContext = PipelineContext( run_id="test_pattern_cleanup_usage_statistics_user" ) - pipeline_context.graph = mock_datahub_graph(DatahubClientConfig) + pipeline_context.graph = mock_datahub_graph_instance TS_1 = datetime(year=2023, month=1, day=1, tzinfo=timezone.utc) @@ -4092,7 +4094,7 @@ def test_pattern_cleanup_usage_statistics_user_3( assert output[0].record.aspect.userCounts == expectedUsageStatistics.userCounts -def test_domain_mapping_based_on_tags_with_valid_tags(mock_datahub_graph): +def test_domain_mapping_based_on_tags_with_valid_tags(mock_datahub_graph_instance): acryl_domain = builder.make_domain_urn("acryl.io") server_domain = builder.make_domain_urn("test.io") @@ -4103,7 +4105,7 @@ def fake_get_tags(entity_urn: str) -> models.GlobalTagsClass: return models.GlobalTagsClass(tags=[TagAssociationClass(tag=tag_one)]) pipeline_context = PipelineContext(run_id="transformer_pipe_line") - pipeline_context.graph = mock_datahub_graph(DatahubClientConfig()) + pipeline_context.graph = mock_datahub_graph_instance pipeline_context.graph.get_tags = fake_get_tags # type: ignore @@ -4126,13 +4128,15 @@ def fake_get_tags(entity_urn: str) -> models.GlobalTagsClass: assert server_domain not in transformed_aspect.domains -def test_domain_mapping_based_on_tags_with_no_matching_tags(mock_datahub_graph): +def test_domain_mapping_based_on_tags_with_no_matching_tags( + mock_datahub_graph_instance, +): acryl_domain = builder.make_domain_urn("acryl.io") server_domain = builder.make_domain_urn("test.io") non_matching_tag = builder.make_tag_urn("nonMatching") pipeline_context = PipelineContext(run_id="no_match_pipeline") - pipeline_context.graph = mock_datahub_graph(DatahubClientConfig()) + pipeline_context.graph = mock_datahub_graph_instance # Return fake aspect to simulate server behaviour def fake_get_tags(entity_urn: str) -> models.GlobalTagsClass: @@ -4157,11 +4161,11 @@ def fake_get_tags(entity_urn: str) -> models.GlobalTagsClass: assert server_domain in transformed_aspect.domains -def test_domain_mapping_based_on_tags_with_empty_config(mock_datahub_graph): +def test_domain_mapping_based_on_tags_with_empty_config(mock_datahub_graph_instance): some_tag = builder.make_tag_urn("someTag") pipeline_context = PipelineContext(run_id="empty_config_pipeline") - pipeline_context.graph = mock_datahub_graph(DatahubClientConfig()) + pipeline_context.graph = mock_datahub_graph_instance # Return fake aspect to simulate server behaviour def fake_get_tags(entity_urn: str) -> models.GlobalTagsClass: @@ -4180,7 +4184,9 @@ def fake_get_tags(entity_urn: str) -> models.GlobalTagsClass: assert len(output[0].record.aspect.domains) == 0 -def test_domain_mapping_based__r_on_tags_with_multiple_tags(mock_datahub_graph): +def test_domain_mapping_based__r_on_tags_with_multiple_tags( + mock_datahub_graph_instance, +): # Two tags that match different rules in the domain mapping configuration tag_one = builder.make_tag_urn("test:tag_1") tag_two = builder.make_tag_urn("test:tag_2") @@ -4189,7 +4195,7 @@ def test_domain_mapping_based__r_on_tags_with_multiple_tags(mock_datahub_graph): hr = builder.make_domain_urn("hr") pipeline_context = PipelineContext(run_id="multiple_matches_pipeline") - pipeline_context.graph = mock_datahub_graph(DatahubClientConfig()) + pipeline_context.graph = mock_datahub_graph_instance # Return fake aspect to simulate server behaviour def fake_get_tags(entity_urn: str) -> models.GlobalTagsClass: @@ -4226,11 +4232,11 @@ def fake_get_domain(entity_urn: str) -> models.DomainsClass: assert len(transformed_aspect.domains) == 3 -def test_domain_mapping_based_on_tags_with_empty_tags(mock_datahub_graph): +def test_domain_mapping_based_on_tags_with_empty_tags(mock_datahub_graph_instance): acryl_domain = builder.make_domain_urn("acryl.io") server_domain = builder.make_domain_urn("test.io") pipeline_context = PipelineContext(run_id="empty_config_pipeline") - pipeline_context.graph = mock_datahub_graph(DatahubClientConfig()) + pipeline_context.graph = mock_datahub_graph_instance # Return fake aspect to simulate server behaviour def fake_get_tags(entity_urn: str) -> models.GlobalTagsClass: @@ -4254,11 +4260,11 @@ def fake_get_tags(entity_urn: str) -> models.GlobalTagsClass: assert server_domain not in transformed_aspect.domains -def test_domain_mapping_based_on_tags_with_no_tags(mock_datahub_graph): +def test_domain_mapping_based_on_tags_with_no_tags(mock_datahub_graph_instance): acryl_domain = builder.make_domain_urn("acryl.io") server_domain = builder.make_domain_urn("test.io") pipeline_context = PipelineContext(run_id="empty_config_pipeline") - pipeline_context.graph = mock_datahub_graph(DatahubClientConfig()) + pipeline_context.graph = mock_datahub_graph_instance # Return fake aspect to simulate server behaviour def fake_get_tags(entity_urn: str) -> Optional[models.GlobalTagsClass]: @@ -4282,7 +4288,7 @@ def fake_get_tags(entity_urn: str) -> Optional[models.GlobalTagsClass]: assert server_domain not in transformed_aspect.domains -def test_tags_to_terms_transformation(mock_datahub_graph): +def test_tags_to_terms_transformation(mock_datahub_graph_instance): # Create domain URNs for the test term_urn_example1 = builder.make_term_urn("example1") term_urn_example2 = builder.make_term_urn("example2") @@ -4349,7 +4355,7 @@ def fake_schema_metadata(entity_urn: str) -> models.SchemaMetadataClass: ) pipeline_context = PipelineContext(run_id="transformer_pipe_line") - pipeline_context.graph = mock_datahub_graph(DatahubClientConfig()) + pipeline_context.graph = mock_datahub_graph_instance pipeline_context.graph.get_tags = fake_get_tags # type: ignore pipeline_context.graph.get_schema_metadata = fake_schema_metadata # type: ignore @@ -4383,7 +4389,7 @@ def fake_schema_metadata(entity_urn: str) -> models.SchemaMetadataClass: } -def test_tags_to_terms_with_no_matching_terms(mock_datahub_graph): +def test_tags_to_terms_with_no_matching_terms(mock_datahub_graph_instance): # Setup for test where no tags match the provided term mappings def fake_get_tags_no_match(entity_urn: str) -> models.GlobalTagsClass: return models.GlobalTagsClass( @@ -4394,7 +4400,7 @@ def fake_get_tags_no_match(entity_urn: str) -> models.GlobalTagsClass: ) pipeline_context = PipelineContext(run_id="transformer_pipe_line") - pipeline_context.graph = mock_datahub_graph(DatahubClientConfig()) + pipeline_context.graph = mock_datahub_graph_instance pipeline_context.graph.get_tags = fake_get_tags_no_match # type: ignore # No matching terms in config @@ -4420,13 +4426,13 @@ def fake_get_tags_no_match(entity_urn: str) -> models.GlobalTagsClass: assert len(terms_aspect.terms) == 1 -def test_tags_to_terms_with_missing_tags(mock_datahub_graph): +def test_tags_to_terms_with_missing_tags(mock_datahub_graph_instance): # Setup for test where no tags are present def fake_get_no_tags(entity_urn: str) -> models.GlobalTagsClass: return models.GlobalTagsClass(tags=[]) pipeline_context = PipelineContext(run_id="transformer_pipe_line") - pipeline_context.graph = mock_datahub_graph(DatahubClientConfig()) + pipeline_context.graph = mock_datahub_graph_instance pipeline_context.graph.get_tags = fake_get_no_tags # type: ignore config = {"tags": ["example1", "example2"]} @@ -4451,7 +4457,7 @@ def fake_get_no_tags(entity_urn: str) -> models.GlobalTagsClass: assert len(terms_aspect.terms) == 1 -def test_tags_to_terms_with_partial_match(mock_datahub_graph): +def test_tags_to_terms_with_partial_match(mock_datahub_graph_instance): # Setup for partial match scenario def fake_get_partial_match_tags(entity_urn: str) -> models.GlobalTagsClass: return models.GlobalTagsClass( @@ -4466,7 +4472,7 @@ def fake_get_partial_match_tags(entity_urn: str) -> models.GlobalTagsClass: ) pipeline_context = PipelineContext(run_id="transformer_pipe_line") - pipeline_context.graph = mock_datahub_graph(DatahubClientConfig()) + pipeline_context.graph = mock_datahub_graph_instance pipeline_context.graph.get_tags = fake_get_partial_match_tags # type: ignore config = {"tags": ["example1"]} # Only 'example1' has a term mapped @@ -4493,12 +4499,12 @@ def fake_get_partial_match_tags(entity_urn: str) -> models.GlobalTagsClass: def test_replace_external_url_container_word_replace( - mock_datahub_graph, + mock_datahub_graph_instance, ): pipeline_context: PipelineContext = PipelineContext( run_id="test_replace_external_url_container" ) - pipeline_context.graph = mock_datahub_graph(DatahubClientConfig) + pipeline_context.graph = mock_datahub_graph_instance output = run_container_transformer_pipeline( transformer_type=ReplaceExternalUrlContainer, @@ -4521,12 +4527,12 @@ def test_replace_external_url_container_word_replace( def test_replace_external_regex_container_replace_1( - mock_datahub_graph, + mock_datahub_graph_instance, ): pipeline_context: PipelineContext = PipelineContext( run_id="test_replace_external_url_container" ) - pipeline_context.graph = mock_datahub_graph(DatahubClientConfig) + pipeline_context.graph = mock_datahub_graph_instance output = run_container_transformer_pipeline( transformer_type=ReplaceExternalUrlContainer, @@ -4549,12 +4555,12 @@ def test_replace_external_regex_container_replace_1( def test_replace_external_regex_container_replace_2( - mock_datahub_graph, + mock_datahub_graph_instance, ): pipeline_context: PipelineContext = PipelineContext( run_id="test_replace_external_url_container" ) - pipeline_context.graph = mock_datahub_graph(DatahubClientConfig) + pipeline_context.graph = mock_datahub_graph_instance output = run_container_transformer_pipeline( transformer_type=ReplaceExternalUrlContainer, diff --git a/smoke-test/pytest.ini b/smoke-test/pytest.ini index 61ce840fd713c..3762344cec4be 100644 --- a/smoke-test/pytest.ini +++ b/smoke-test/pytest.ini @@ -1,3 +1,6 @@ [pytest] markers = read_only: marks tests as read only (deselect with '-m "not read_only"') + ; no_cypress_suite0: main smoke tests; expressed as the negative of the others + no_cypress_suite1: main smoke tests, suite 1 + test_run_cypress: run cypress tests diff --git a/smoke-test/tests/test_stateful_ingestion.py b/smoke-test/tests/test_stateful_ingestion.py index c0df51dd9d98e..4436cf26c2fd7 100644 --- a/smoke-test/tests/test_stateful_ingestion.py +++ b/smoke-test/tests/test_stateful_ingestion.py @@ -65,10 +65,6 @@ def get_current_checkpoint_from_pipeline( "enabled": True, "remove_stale_metadata": True, "fail_safe_threshold": 100.0, - "state_provider": { - "type": "datahub", - "config": {"datahub_api": {"server": auth_session.gms_url()}}, - }, }, } From b8144699fdb37c3e935bef92b628d1d956e3f666 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Wed, 16 Oct 2024 19:18:32 -0700 Subject: [PATCH 39/50] chore(ingest): reorganize unit tests (#11636) --- .../tests/unit/{ => api}/test_apis.py | 0 .../test_entity_filter_report.py} | 0 .../tests/unit/{ => api}/test_pipeline.py | 10 +++---- .../unit/{ => api}/test_plugin_system.py | 0 .../{test_report.py => test_source_report.py} | 0 .../tests/unit/{ => api}/test_workunit.py | 0 .../{ => bigquery}/test_bigquery_lineage.py | 0 .../{ => bigquery}/test_bigquery_profiler.py | 0 .../{ => bigquery}/test_bigquery_source.py | 0 .../test_bigquery_sql_lineage.py | 0 .../{ => bigquery}/test_bigquery_usage.py | 0 .../test_bigqueryv2_usage_source.py | 0 .../test_bq_get_partition_range.py | 0 .../tests/unit/{ => cli}/test_check.py | 0 .../unit/{ => cli}/test_check_upgrade.py | 0 .../tests/unit/{ => cli}/test_cli_utils.py | 0 .../{ => config}/test_key_value_pattern.py | 0 .../tests/unit/glue/__init__.py | 0 .../tests/unit/{ => glue}/test_glue_source.py | 12 ++------ .../unit/{ => glue}/test_glue_source_stubs.py | 0 .../tests/unit/redshift/__init__.py | 0 .../{ => redshift}/redshift_query_mocker.py | 0 .../{ => redshift}/test_redshift_config.py | 0 .../{ => redshift}/test_redshift_lineage.py | 2 +- .../{ => redshift}/test_redshift_source.py | 0 .../tests/unit/sagemaker/__init__.py | 0 .../unit/sagemaker/test_sagemaker_source.py | 2 +- .../test_sagemaker_source_stubs.py | 0 .../tests/unit/{graph => sdk}/test_client.py | 0 .../unit/{ => sdk}/test_kafka_emitter.py | 0 .../tests/unit/{ => sdk}/test_mce_builder.py | 0 .../tests/unit/{ => sdk}/test_mcp_builder.py | 0 .../tests/unit/{ => sdk}/test_mcp_wrapper.py | 0 .../tests/unit/{ => sdk}/test_rest_emitter.py | 0 .../tests/unit/{ => serde}/test_codegen.py | 0 .../state}/test_ldap_state.py | 0 .../unit/{ => utilities}/test_cli_logging.py | 0 .../unit/{ => utilities}/test_ordered_set.py | 0 .../tests/unit/utilities/test_perf_timer.py | 28 ++++++++++--------- .../test_serialized_lru_cache.py | 0 .../{ => utilities}/test_topological_sort.py | 0 .../unit/{ => utilities}/test_utilities.py | 0 42 files changed, 25 insertions(+), 29 deletions(-) rename metadata-ingestion/tests/unit/{ => api}/test_apis.py (100%) rename metadata-ingestion/tests/unit/{test_report.py => api/test_entity_filter_report.py} (100%) rename metadata-ingestion/tests/unit/{ => api}/test_pipeline.py (97%) rename metadata-ingestion/tests/unit/{ => api}/test_plugin_system.py (100%) rename metadata-ingestion/tests/unit/api/{test_report.py => test_source_report.py} (100%) rename metadata-ingestion/tests/unit/{ => api}/test_workunit.py (100%) rename metadata-ingestion/tests/unit/{ => bigquery}/test_bigquery_lineage.py (100%) rename metadata-ingestion/tests/unit/{ => bigquery}/test_bigquery_profiler.py (100%) rename metadata-ingestion/tests/unit/{ => bigquery}/test_bigquery_source.py (100%) rename metadata-ingestion/tests/unit/{ => bigquery}/test_bigquery_sql_lineage.py (100%) rename metadata-ingestion/tests/unit/{ => bigquery}/test_bigquery_usage.py (100%) rename metadata-ingestion/tests/unit/{ => bigquery}/test_bigqueryv2_usage_source.py (100%) rename metadata-ingestion/tests/unit/{ => bigquery}/test_bq_get_partition_range.py (100%) rename metadata-ingestion/tests/unit/{ => cli}/test_check.py (100%) rename metadata-ingestion/tests/unit/{ => cli}/test_check_upgrade.py (100%) rename metadata-ingestion/tests/unit/{ => cli}/test_cli_utils.py (100%) rename metadata-ingestion/tests/unit/{ => config}/test_key_value_pattern.py (100%) create mode 100644 metadata-ingestion/tests/unit/glue/__init__.py rename metadata-ingestion/tests/unit/{ => glue}/test_glue_source.py (97%) rename metadata-ingestion/tests/unit/{ => glue}/test_glue_source_stubs.py (100%) create mode 100644 metadata-ingestion/tests/unit/redshift/__init__.py rename metadata-ingestion/tests/unit/{ => redshift}/redshift_query_mocker.py (100%) rename metadata-ingestion/tests/unit/{ => redshift}/test_redshift_config.py (100%) rename metadata-ingestion/tests/unit/{ => redshift}/test_redshift_lineage.py (99%) rename metadata-ingestion/tests/unit/{ => redshift}/test_redshift_source.py (100%) create mode 100644 metadata-ingestion/tests/unit/sagemaker/__init__.py rename metadata-ingestion/tests/unit/{ => sagemaker}/test_sagemaker_source_stubs.py (100%) rename metadata-ingestion/tests/unit/{graph => sdk}/test_client.py (100%) rename metadata-ingestion/tests/unit/{ => sdk}/test_kafka_emitter.py (100%) rename metadata-ingestion/tests/unit/{ => sdk}/test_mce_builder.py (100%) rename metadata-ingestion/tests/unit/{ => sdk}/test_mcp_builder.py (100%) rename metadata-ingestion/tests/unit/{ => sdk}/test_mcp_wrapper.py (100%) rename metadata-ingestion/tests/unit/{ => sdk}/test_rest_emitter.py (100%) rename metadata-ingestion/tests/unit/{ => serde}/test_codegen.py (100%) rename metadata-ingestion/tests/unit/{ => stateful_ingestion/state}/test_ldap_state.py (100%) rename metadata-ingestion/tests/unit/{ => utilities}/test_cli_logging.py (100%) rename metadata-ingestion/tests/unit/{ => utilities}/test_ordered_set.py (100%) rename metadata-ingestion/tests/unit/{ => utilities}/test_serialized_lru_cache.py (100%) rename metadata-ingestion/tests/unit/{ => utilities}/test_topological_sort.py (100%) rename metadata-ingestion/tests/unit/{ => utilities}/test_utilities.py (100%) diff --git a/metadata-ingestion/tests/unit/test_apis.py b/metadata-ingestion/tests/unit/api/test_apis.py similarity index 100% rename from metadata-ingestion/tests/unit/test_apis.py rename to metadata-ingestion/tests/unit/api/test_apis.py diff --git a/metadata-ingestion/tests/unit/test_report.py b/metadata-ingestion/tests/unit/api/test_entity_filter_report.py similarity index 100% rename from metadata-ingestion/tests/unit/test_report.py rename to metadata-ingestion/tests/unit/api/test_entity_filter_report.py diff --git a/metadata-ingestion/tests/unit/test_pipeline.py b/metadata-ingestion/tests/unit/api/test_pipeline.py similarity index 97% rename from metadata-ingestion/tests/unit/test_pipeline.py rename to metadata-ingestion/tests/unit/api/test_pipeline.py index a462f28136797..432d8e11c1c0b 100644 --- a/metadata-ingestion/tests/unit/test_pipeline.py +++ b/metadata-ingestion/tests/unit/api/test_pipeline.py @@ -224,9 +224,9 @@ def test_configure_with_file_sink_does_not_init_graph(self, mock_source, tmp_pat def test_run_including_fake_transformation(self): pipeline = Pipeline.create( { - "source": {"type": "tests.unit.test_pipeline.FakeSource"}, + "source": {"type": "tests.unit.api.test_pipeline.FakeSource"}, "transformers": [ - {"type": "tests.unit.test_pipeline.AddStatusRemovedTransformer"} + {"type": "tests.unit.api.test_pipeline.AddStatusRemovedTransformer"} ], "sink": {"type": "tests.test_helpers.sink_helpers.RecordingSink"}, "run_id": "pipeline_test", @@ -253,7 +253,7 @@ def test_run_including_registered_transformation(self): pipeline = Pipeline.create( { - "source": {"type": "tests.unit.test_pipeline.FakeSource"}, + "source": {"type": "tests.unit.api.test_pipeline.FakeSource"}, "transformers": [ { "type": "simple_add_dataset_ownership", @@ -297,7 +297,7 @@ def test_pipeline_return_code(self, tmp_path, source, strict_warnings, exit_code --- run_id: pipeline_test source: - type: tests.unit.test_pipeline.{source} + type: tests.unit.api.test_pipeline.{source} config: {{}} sink: type: console @@ -379,7 +379,7 @@ def test_pipeline_return_code(self, tmp_path, source, strict_warnings, exit_code def test_pipeline_process_commits(self, commit_policy, source, should_commit): pipeline = Pipeline.create( { - "source": {"type": f"tests.unit.test_pipeline.{source}"}, + "source": {"type": f"tests.unit.api.test_pipeline.{source}"}, "sink": {"type": "console"}, "run_id": "pipeline_test", } diff --git a/metadata-ingestion/tests/unit/test_plugin_system.py b/metadata-ingestion/tests/unit/api/test_plugin_system.py similarity index 100% rename from metadata-ingestion/tests/unit/test_plugin_system.py rename to metadata-ingestion/tests/unit/api/test_plugin_system.py diff --git a/metadata-ingestion/tests/unit/api/test_report.py b/metadata-ingestion/tests/unit/api/test_source_report.py similarity index 100% rename from metadata-ingestion/tests/unit/api/test_report.py rename to metadata-ingestion/tests/unit/api/test_source_report.py diff --git a/metadata-ingestion/tests/unit/test_workunit.py b/metadata-ingestion/tests/unit/api/test_workunit.py similarity index 100% rename from metadata-ingestion/tests/unit/test_workunit.py rename to metadata-ingestion/tests/unit/api/test_workunit.py diff --git a/metadata-ingestion/tests/unit/test_bigquery_lineage.py b/metadata-ingestion/tests/unit/bigquery/test_bigquery_lineage.py similarity index 100% rename from metadata-ingestion/tests/unit/test_bigquery_lineage.py rename to metadata-ingestion/tests/unit/bigquery/test_bigquery_lineage.py diff --git a/metadata-ingestion/tests/unit/test_bigquery_profiler.py b/metadata-ingestion/tests/unit/bigquery/test_bigquery_profiler.py similarity index 100% rename from metadata-ingestion/tests/unit/test_bigquery_profiler.py rename to metadata-ingestion/tests/unit/bigquery/test_bigquery_profiler.py diff --git a/metadata-ingestion/tests/unit/test_bigquery_source.py b/metadata-ingestion/tests/unit/bigquery/test_bigquery_source.py similarity index 100% rename from metadata-ingestion/tests/unit/test_bigquery_source.py rename to metadata-ingestion/tests/unit/bigquery/test_bigquery_source.py diff --git a/metadata-ingestion/tests/unit/test_bigquery_sql_lineage.py b/metadata-ingestion/tests/unit/bigquery/test_bigquery_sql_lineage.py similarity index 100% rename from metadata-ingestion/tests/unit/test_bigquery_sql_lineage.py rename to metadata-ingestion/tests/unit/bigquery/test_bigquery_sql_lineage.py diff --git a/metadata-ingestion/tests/unit/test_bigquery_usage.py b/metadata-ingestion/tests/unit/bigquery/test_bigquery_usage.py similarity index 100% rename from metadata-ingestion/tests/unit/test_bigquery_usage.py rename to metadata-ingestion/tests/unit/bigquery/test_bigquery_usage.py diff --git a/metadata-ingestion/tests/unit/test_bigqueryv2_usage_source.py b/metadata-ingestion/tests/unit/bigquery/test_bigqueryv2_usage_source.py similarity index 100% rename from metadata-ingestion/tests/unit/test_bigqueryv2_usage_source.py rename to metadata-ingestion/tests/unit/bigquery/test_bigqueryv2_usage_source.py diff --git a/metadata-ingestion/tests/unit/test_bq_get_partition_range.py b/metadata-ingestion/tests/unit/bigquery/test_bq_get_partition_range.py similarity index 100% rename from metadata-ingestion/tests/unit/test_bq_get_partition_range.py rename to metadata-ingestion/tests/unit/bigquery/test_bq_get_partition_range.py diff --git a/metadata-ingestion/tests/unit/test_check.py b/metadata-ingestion/tests/unit/cli/test_check.py similarity index 100% rename from metadata-ingestion/tests/unit/test_check.py rename to metadata-ingestion/tests/unit/cli/test_check.py diff --git a/metadata-ingestion/tests/unit/test_check_upgrade.py b/metadata-ingestion/tests/unit/cli/test_check_upgrade.py similarity index 100% rename from metadata-ingestion/tests/unit/test_check_upgrade.py rename to metadata-ingestion/tests/unit/cli/test_check_upgrade.py diff --git a/metadata-ingestion/tests/unit/test_cli_utils.py b/metadata-ingestion/tests/unit/cli/test_cli_utils.py similarity index 100% rename from metadata-ingestion/tests/unit/test_cli_utils.py rename to metadata-ingestion/tests/unit/cli/test_cli_utils.py diff --git a/metadata-ingestion/tests/unit/test_key_value_pattern.py b/metadata-ingestion/tests/unit/config/test_key_value_pattern.py similarity index 100% rename from metadata-ingestion/tests/unit/test_key_value_pattern.py rename to metadata-ingestion/tests/unit/config/test_key_value_pattern.py diff --git a/metadata-ingestion/tests/unit/glue/__init__.py b/metadata-ingestion/tests/unit/glue/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/metadata-ingestion/tests/unit/test_glue_source.py b/metadata-ingestion/tests/unit/glue/test_glue_source.py similarity index 97% rename from metadata-ingestion/tests/unit/test_glue_source.py rename to metadata-ingestion/tests/unit/glue/test_glue_source.py index eb1e7f3fe41d9..57f48db1129c4 100644 --- a/metadata-ingestion/tests/unit/test_glue_source.py +++ b/metadata-ingestion/tests/unit/glue/test_glue_source.py @@ -35,7 +35,7 @@ validate_all_providers_have_committed_successfully, ) from tests.test_helpers.type_helpers import PytestConfig -from tests.unit.test_glue_source_stubs import ( +from tests.unit.glue.test_glue_source_stubs import ( databases_1, databases_2, get_bucket_tagging, @@ -71,6 +71,8 @@ GMS_PORT = 8080 GMS_SERVER = f"http://localhost:{GMS_PORT}" +test_resources_dir = Path(__file__).parent + def glue_source( platform_instance: Optional[str] = None, @@ -247,7 +249,6 @@ def test_glue_ingest( write_metadata_file(tmp_path / mce_file, mce_objects) # Verify the output. - test_resources_dir = pytestconfig.rootpath / "tests/unit/glue" mce_helpers.check_golden_file( pytestconfig, output_path=tmp_path / mce_file, @@ -312,8 +313,6 @@ def test_config_without_platform(): @freeze_time(FROZEN_TIME) def test_glue_stateful(pytestconfig, tmp_path, mock_time, mock_datahub_graph): - test_resources_dir = pytestconfig.rootpath / "tests/unit/glue" - deleted_actor_golden_mcs = "{}/glue_deleted_actor_mces_golden.json".format( test_resources_dir ) @@ -438,7 +437,6 @@ def test_glue_with_delta_schema_ingest( write_metadata_file(tmp_path / "glue_delta_mces.json", mce_objects) # Verify the output. - test_resources_dir = pytestconfig.rootpath / "tests/unit/glue" mce_helpers.check_golden_file( pytestconfig, output_path=tmp_path / "glue_delta_mces.json", @@ -475,7 +473,6 @@ def test_glue_with_malformed_delta_schema_ingest( write_metadata_file(tmp_path / "glue_malformed_delta_mces.json", mce_objects) # Verify the output. - test_resources_dir = pytestconfig.rootpath / "tests/unit/glue" mce_helpers.check_golden_file( pytestconfig, output_path=tmp_path / "glue_malformed_delta_mces.json", @@ -571,7 +568,6 @@ def test_glue_ingest_include_table_lineage( write_metadata_file(tmp_path / mce_file, mce_objects) # Verify the output. - test_resources_dir = pytestconfig.rootpath / "tests/unit/glue" mce_helpers.check_golden_file( pytestconfig, output_path=tmp_path / mce_file, @@ -678,7 +674,6 @@ def fake_schema_metadata(entity_urn: str) -> models.SchemaMetadataClass: write_metadata_file(tmp_path / mce_file, mce_objects) # Verify the output. - test_resources_dir = pytestconfig.rootpath / "tests/unit/glue" mce_helpers.check_golden_file( pytestconfig, output_path=tmp_path / mce_file, @@ -716,7 +711,6 @@ def test_glue_ingest_with_profiling( write_metadata_file(tmp_path / mce_file, mce_objects) # Verify the output. - test_resources_dir = pytestconfig.rootpath / "tests/unit/glue" mce_helpers.check_golden_file( pytestconfig, output_path=tmp_path / mce_file, diff --git a/metadata-ingestion/tests/unit/test_glue_source_stubs.py b/metadata-ingestion/tests/unit/glue/test_glue_source_stubs.py similarity index 100% rename from metadata-ingestion/tests/unit/test_glue_source_stubs.py rename to metadata-ingestion/tests/unit/glue/test_glue_source_stubs.py diff --git a/metadata-ingestion/tests/unit/redshift/__init__.py b/metadata-ingestion/tests/unit/redshift/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/metadata-ingestion/tests/unit/redshift_query_mocker.py b/metadata-ingestion/tests/unit/redshift/redshift_query_mocker.py similarity index 100% rename from metadata-ingestion/tests/unit/redshift_query_mocker.py rename to metadata-ingestion/tests/unit/redshift/redshift_query_mocker.py diff --git a/metadata-ingestion/tests/unit/test_redshift_config.py b/metadata-ingestion/tests/unit/redshift/test_redshift_config.py similarity index 100% rename from metadata-ingestion/tests/unit/test_redshift_config.py rename to metadata-ingestion/tests/unit/redshift/test_redshift_config.py diff --git a/metadata-ingestion/tests/unit/test_redshift_lineage.py b/metadata-ingestion/tests/unit/redshift/test_redshift_lineage.py similarity index 99% rename from metadata-ingestion/tests/unit/test_redshift_lineage.py rename to metadata-ingestion/tests/unit/redshift/test_redshift_lineage.py index 78b7169a93f3c..2e3eb8fde1292 100644 --- a/metadata-ingestion/tests/unit/test_redshift_lineage.py +++ b/metadata-ingestion/tests/unit/redshift/test_redshift_lineage.py @@ -26,7 +26,7 @@ SqlParsingDebugInfo, SqlParsingResult, ) -from tests.unit.redshift_query_mocker import mock_cursor +from tests.unit.redshift.redshift_query_mocker import mock_cursor def test_get_sources_from_query(): diff --git a/metadata-ingestion/tests/unit/test_redshift_source.py b/metadata-ingestion/tests/unit/redshift/test_redshift_source.py similarity index 100% rename from metadata-ingestion/tests/unit/test_redshift_source.py rename to metadata-ingestion/tests/unit/redshift/test_redshift_source.py diff --git a/metadata-ingestion/tests/unit/sagemaker/__init__.py b/metadata-ingestion/tests/unit/sagemaker/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/metadata-ingestion/tests/unit/sagemaker/test_sagemaker_source.py b/metadata-ingestion/tests/unit/sagemaker/test_sagemaker_source.py index 995d176c213b2..2450e6fa8fe56 100644 --- a/metadata-ingestion/tests/unit/sagemaker/test_sagemaker_source.py +++ b/metadata-ingestion/tests/unit/sagemaker/test_sagemaker_source.py @@ -14,7 +14,7 @@ job_types, ) from tests.test_helpers import mce_helpers -from tests.unit.test_sagemaker_source_stubs import ( +from tests.unit.sagemaker.test_sagemaker_source_stubs import ( describe_endpoint_response_1, describe_endpoint_response_2, describe_feature_group_response_1, diff --git a/metadata-ingestion/tests/unit/test_sagemaker_source_stubs.py b/metadata-ingestion/tests/unit/sagemaker/test_sagemaker_source_stubs.py similarity index 100% rename from metadata-ingestion/tests/unit/test_sagemaker_source_stubs.py rename to metadata-ingestion/tests/unit/sagemaker/test_sagemaker_source_stubs.py diff --git a/metadata-ingestion/tests/unit/graph/test_client.py b/metadata-ingestion/tests/unit/sdk/test_client.py similarity index 100% rename from metadata-ingestion/tests/unit/graph/test_client.py rename to metadata-ingestion/tests/unit/sdk/test_client.py diff --git a/metadata-ingestion/tests/unit/test_kafka_emitter.py b/metadata-ingestion/tests/unit/sdk/test_kafka_emitter.py similarity index 100% rename from metadata-ingestion/tests/unit/test_kafka_emitter.py rename to metadata-ingestion/tests/unit/sdk/test_kafka_emitter.py diff --git a/metadata-ingestion/tests/unit/test_mce_builder.py b/metadata-ingestion/tests/unit/sdk/test_mce_builder.py similarity index 100% rename from metadata-ingestion/tests/unit/test_mce_builder.py rename to metadata-ingestion/tests/unit/sdk/test_mce_builder.py diff --git a/metadata-ingestion/tests/unit/test_mcp_builder.py b/metadata-ingestion/tests/unit/sdk/test_mcp_builder.py similarity index 100% rename from metadata-ingestion/tests/unit/test_mcp_builder.py rename to metadata-ingestion/tests/unit/sdk/test_mcp_builder.py diff --git a/metadata-ingestion/tests/unit/test_mcp_wrapper.py b/metadata-ingestion/tests/unit/sdk/test_mcp_wrapper.py similarity index 100% rename from metadata-ingestion/tests/unit/test_mcp_wrapper.py rename to metadata-ingestion/tests/unit/sdk/test_mcp_wrapper.py diff --git a/metadata-ingestion/tests/unit/test_rest_emitter.py b/metadata-ingestion/tests/unit/sdk/test_rest_emitter.py similarity index 100% rename from metadata-ingestion/tests/unit/test_rest_emitter.py rename to metadata-ingestion/tests/unit/sdk/test_rest_emitter.py diff --git a/metadata-ingestion/tests/unit/test_codegen.py b/metadata-ingestion/tests/unit/serde/test_codegen.py similarity index 100% rename from metadata-ingestion/tests/unit/test_codegen.py rename to metadata-ingestion/tests/unit/serde/test_codegen.py diff --git a/metadata-ingestion/tests/unit/test_ldap_state.py b/metadata-ingestion/tests/unit/stateful_ingestion/state/test_ldap_state.py similarity index 100% rename from metadata-ingestion/tests/unit/test_ldap_state.py rename to metadata-ingestion/tests/unit/stateful_ingestion/state/test_ldap_state.py diff --git a/metadata-ingestion/tests/unit/test_cli_logging.py b/metadata-ingestion/tests/unit/utilities/test_cli_logging.py similarity index 100% rename from metadata-ingestion/tests/unit/test_cli_logging.py rename to metadata-ingestion/tests/unit/utilities/test_cli_logging.py diff --git a/metadata-ingestion/tests/unit/test_ordered_set.py b/metadata-ingestion/tests/unit/utilities/test_ordered_set.py similarity index 100% rename from metadata-ingestion/tests/unit/test_ordered_set.py rename to metadata-ingestion/tests/unit/utilities/test_ordered_set.py diff --git a/metadata-ingestion/tests/unit/utilities/test_perf_timer.py b/metadata-ingestion/tests/unit/utilities/test_perf_timer.py index 6129b3e37d8bc..1de76a32fb708 100644 --- a/metadata-ingestion/tests/unit/utilities/test_perf_timer.py +++ b/metadata-ingestion/tests/unit/utilities/test_perf_timer.py @@ -10,37 +10,39 @@ def test_perf_timer_simple(): with PerfTimer() as timer: - time.sleep(1) - assert approx(timer.elapsed_seconds()) == 1 + time.sleep(0.4) + assert approx(timer.elapsed_seconds()) == 0.4 - assert approx(timer.elapsed_seconds()) == 1 + assert approx(timer.elapsed_seconds()) == 0.4 def test_perf_timer_paused_timer(): with PerfTimer() as current_timer: - time.sleep(1) - assert approx(current_timer.elapsed_seconds()) == 1 + time.sleep(0.5) + assert approx(current_timer.elapsed_seconds()) == 0.5 with current_timer.pause(): - time.sleep(2) - assert approx(current_timer.elapsed_seconds()) == 1 - assert approx(current_timer.elapsed_seconds()) == 1 - time.sleep(1) + time.sleep(0.3) + assert approx(current_timer.elapsed_seconds()) == 0.5 + assert approx(current_timer.elapsed_seconds()) == 0.5 + time.sleep(0.2) - assert approx(current_timer.elapsed_seconds()) == 2 + assert approx(current_timer.elapsed_seconds()) == 0.7 def test_generator_with_paused_timer(): + n = 4 + def generator_function(): with PerfTimer() as inner_timer: time.sleep(1) - for i in range(10): + for i in range(n): time.sleep(0.2) with inner_timer.pause(): time.sleep(0.2) yield i - assert approx(inner_timer.elapsed_seconds()) == 1 + 0.2 * 10 + assert approx(inner_timer.elapsed_seconds()) == 1 + 0.2 * n with PerfTimer() as outer_timer: seq = generator_function() list([i for i in seq]) - assert approx(outer_timer.elapsed_seconds()) == 1 + 0.2 * 10 + 0.2 * 10 + assert approx(outer_timer.elapsed_seconds()) == 1 + 0.2 * n + 0.2 * n diff --git a/metadata-ingestion/tests/unit/test_serialized_lru_cache.py b/metadata-ingestion/tests/unit/utilities/test_serialized_lru_cache.py similarity index 100% rename from metadata-ingestion/tests/unit/test_serialized_lru_cache.py rename to metadata-ingestion/tests/unit/utilities/test_serialized_lru_cache.py diff --git a/metadata-ingestion/tests/unit/test_topological_sort.py b/metadata-ingestion/tests/unit/utilities/test_topological_sort.py similarity index 100% rename from metadata-ingestion/tests/unit/test_topological_sort.py rename to metadata-ingestion/tests/unit/utilities/test_topological_sort.py diff --git a/metadata-ingestion/tests/unit/test_utilities.py b/metadata-ingestion/tests/unit/utilities/test_utilities.py similarity index 100% rename from metadata-ingestion/tests/unit/test_utilities.py rename to metadata-ingestion/tests/unit/utilities/test_utilities.py From 8b42ac8cdebef89f8aff0ac030c72db9c54aad3d Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Wed, 16 Oct 2024 20:47:48 -0700 Subject: [PATCH 40/50] fix(ingest): run sqllineage in process by default (#11650) --- .../ingestion/source/looker/lookml_config.py | 3 - .../src/datahub/ingestion/source/redash.py | 23 +- .../src/datahub/utilities/sql_parser.py | 2 +- .../lookml/lookml_mces_badsql_parser.json | 2820 ----------------- .../tests/integration/lookml/test_lookml.py | 48 - 5 files changed, 13 insertions(+), 2883 deletions(-) delete mode 100644 metadata-ingestion/tests/integration/lookml/lookml_mces_badsql_parser.json diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_config.py b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_config.py index 0bcee14ec77a1..da837da161386 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_config.py @@ -124,9 +124,6 @@ class LookMLSourceConfig( description="List of regex patterns for LookML views to include in the extraction.", ) parse_table_names_from_sql: bool = Field(True, description="See note below.") - sql_parser: str = Field( - "datahub.utilities.sql_parser.DefaultSQLParser", description="See note below." - ) api: Optional[LookerAPIConfig] project_name: Optional[str] = Field( None, diff --git a/metadata-ingestion/src/datahub/ingestion/source/redash.py b/metadata-ingestion/src/datahub/ingestion/source/redash.py index 38cf0bebcbc12..5fd63e7f93f92 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redash.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redash.py @@ -2,7 +2,6 @@ import math import sys from dataclasses import dataclass, field -from multiprocessing.pool import ThreadPool from typing import Dict, Iterable, List, Optional, Set, Type import dateutil.parser as dp @@ -43,6 +42,7 @@ from datahub.utilities.lossy_collections import LossyDict, LossyList from datahub.utilities.perf_timer import PerfTimer from datahub.utilities.sql_parser import SQLParser +from datahub.utilities.threaded_iterator_executor import ThreadedIteratorExecutor logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) @@ -646,11 +646,11 @@ def _emit_dashboard_mces(self) -> Iterable[MetadataWorkUnit]: self.report.total_dashboards = total_dashboards self.report.max_page_dashboards = max_page - dash_exec_pool = ThreadPool(self.config.parallelism) - for response in dash_exec_pool.imap_unordered( - self._process_dashboard_response, range(1, max_page + 1) - ): - yield from response + yield from ThreadedIteratorExecutor.process( + self._process_dashboard_response, + [(page,) for page in range(1, max_page + 1)], + max_workers=self.config.parallelism, + ) def _get_chart_type_from_viz_data(self, viz_data: Dict) -> str: """ @@ -769,11 +769,12 @@ def _emit_chart_mces(self) -> Iterable[MetadataWorkUnit]: logger.info(f"/api/queries total count {total_queries} and max page {max_page}") self.report.total_queries = total_queries self.report.max_page_queries = max_page - chart_exec_pool = ThreadPool(self.config.parallelism) - for response in chart_exec_pool.imap_unordered( - self._process_query_response, range(1, max_page + 1) - ): - yield from response + + yield from ThreadedIteratorExecutor.process( + self._process_query_response, + [(page,) for page in range(1, max_page + 1)], + max_workers=self.config.parallelism, + ) def add_config_to_report(self) -> None: self.report.api_page_limit = self.config.api_page_limit diff --git a/metadata-ingestion/src/datahub/utilities/sql_parser.py b/metadata-ingestion/src/datahub/utilities/sql_parser.py index 61693b52b350f..b88f8fd8c7302 100644 --- a/metadata-ingestion/src/datahub/utilities/sql_parser.py +++ b/metadata-ingestion/src/datahub/utilities/sql_parser.py @@ -46,7 +46,7 @@ class SqlLineageSQLParser(SQLParser): def __init__( self, sql_query: str, - use_external_process: bool = True, + use_external_process: bool = False, use_raw_names: bool = False, ) -> None: super().__init__(sql_query, use_external_process) diff --git a/metadata-ingestion/tests/integration/lookml/lookml_mces_badsql_parser.json b/metadata-ingestion/tests/integration/lookml/lookml_mces_badsql_parser.json deleted file mode 100644 index 5b39e8dd96ac2..0000000000000 --- a/metadata-ingestion/tests/integration/lookml/lookml_mces_badsql_parser.json +++ /dev/null @@ -1,2820 +0,0 @@ -[ -{ - "entityType": "container", - "entityUrn": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "looker", - "env": "PROD", - "project_name": "lkml_samples" - }, - "name": "lkml_samples", - "env": "PROD" - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:looker" - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "LookML Project" - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "Folders" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "View" - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", - "changeType": "UPSERT", - "aspectName": "viewProperties", - "aspect": { - "json": { - "materialized": false, - "viewLogic": "SELECT\n is_latest,\n country,\n city,\n timestamp,\n measurement\n FROM\n my_table", - "viewLanguage": "sql" - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.BrowsePaths": { - "paths": [ - "/Develop/lkml_samples/" - ] - } - }, - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { - "upstreams": [ - { - "auditStamp": { - "time": 1586847600000, - "actor": "urn:li:corpuser:datahub" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.default_schema.my_table,PROD)", - "type": "VIEW" - } - ], - "fineGrainedLineages": [ - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.default_schema.my_table,PROD),country)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD),country)" - ], - "confidenceScore": 1.0 - }, - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.default_schema.my_table,PROD),city)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD),city)" - ], - "confidenceScore": 1.0 - }, - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.default_schema.my_table,PROD),is_latest)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD),is_latest)" - ], - "confidenceScore": 1.0 - }, - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.default_schema.my_table,PROD),timestamp)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD),timestamp)" - ], - "confidenceScore": 1.0 - }, - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.default_schema.my_table,PROD),measurement)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD),average_measurement)" - ], - "confidenceScore": 1.0 - } - ] - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "my_view", - "platform": "urn:li:dataPlatform:looker", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.OtherSchema": { - "rawSchema": "" - } - }, - "fields": [ - { - "fieldPath": "country", - "nullable": false, - "description": "The country", - "label": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "globalTags": { - "tags": [ - { - "tag": "urn:li:tag:Dimension" - } - ] - }, - "isPartOfKey": false - }, - { - "fieldPath": "city", - "nullable": false, - "description": "City", - "label": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "globalTags": { - "tags": [ - { - "tag": "urn:li:tag:Dimension" - } - ] - }, - "isPartOfKey": false - }, - { - "fieldPath": "is_latest", - "nullable": false, - "description": "Is latest data", - "label": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.BooleanType": {} - } - }, - "nativeDataType": "yesno", - "recursive": false, - "globalTags": { - "tags": [ - { - "tag": "urn:li:tag:Dimension" - } - ] - }, - "isPartOfKey": false - }, - { - "fieldPath": "timestamp", - "nullable": false, - "description": "Timestamp of measurement", - "label": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.TimeType": {} - } - }, - "nativeDataType": "time", - "recursive": false, - "globalTags": { - "tags": [ - { - "tag": "urn:li:tag:Dimension" - }, - { - "tag": "urn:li:tag:Temporal" - } - ] - }, - "isPartOfKey": false - }, - { - "fieldPath": "average_measurement", - "nullable": false, - "description": "My measurement", - "label": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "average", - "recursive": false, - "globalTags": { - "tags": [ - { - "tag": "urn:li:tag:Measure" - } - ] - }, - "isPartOfKey": false - } - ], - "primaryKeys": [] - } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "looker.file.path": "foo.view.lkml", - "looker.model": "data" - }, - "name": "my_view", - "tags": [] - } - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "View" - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", - "changeType": "UPSERT", - "aspectName": "viewProperties", - "aspect": { - "json": { - "materialized": false, - "viewLogic": "SELECT\n country,\n city,\n timestamp,\n measurement\n FROM\n ${my_view.SQL_TABLE_NAME} AS my_view", - "viewLanguage": "sql" - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.BrowsePaths": { - "paths": [ - "/Develop/lkml_samples/" - ] - } - }, - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { - "upstreams": [ - { - "auditStamp": { - "time": 1586847600000, - "actor": "urn:li:corpuser:datahub" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", - "type": "VIEW" - } - ], - "fineGrainedLineages": [ - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD),country)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD),country)" - ], - "confidenceScore": 1.0 - }, - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD),city)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD),city)" - ], - "confidenceScore": 1.0 - }, - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD),timestamp)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD),timestamp)" - ], - "confidenceScore": 1.0 - }, - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD),measurement)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD),average_measurement)" - ], - "confidenceScore": 1.0 - } - ] - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "my_derived_view", - "platform": "urn:li:dataPlatform:looker", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.OtherSchema": { - "rawSchema": "" - } - }, - "fields": [ - { - "fieldPath": "country", - "nullable": false, - "description": "The country", - "label": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "globalTags": { - "tags": [ - { - "tag": "urn:li:tag:Dimension" - } - ] - }, - "isPartOfKey": false - }, - { - "fieldPath": "city", - "nullable": false, - "description": "City", - "label": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "globalTags": { - "tags": [ - { - "tag": "urn:li:tag:Dimension" - } - ] - }, - "isPartOfKey": false - }, - { - "fieldPath": "timestamp", - "nullable": false, - "description": "Timestamp of measurement", - "label": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.TimeType": {} - } - }, - "nativeDataType": "time", - "recursive": false, - "globalTags": { - "tags": [ - { - "tag": "urn:li:tag:Dimension" - }, - { - "tag": "urn:li:tag:Temporal" - } - ] - }, - "isPartOfKey": false - }, - { - "fieldPath": "average_measurement", - "nullable": false, - "description": "My measurement", - "label": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "average", - "recursive": false, - "globalTags": { - "tags": [ - { - "tag": "urn:li:tag:Measure" - } - ] - }, - "isPartOfKey": false - } - ], - "primaryKeys": [] - } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "looker.file.path": "bar.view.lkml", - "looker.model": "data" - }, - "name": "my_derived_view", - "tags": [] - } - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "View" - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", - "changeType": "UPSERT", - "aspectName": "viewProperties", - "aspect": { - "json": { - "materialized": false, - "viewLogic": "view: include_able_view {\n sql_table_name: looker_schema.include_able ;;\n}\n", - "viewLanguage": "lookml" - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.BrowsePaths": { - "paths": [ - "/Develop/lkml_samples/" - ] - } - }, - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { - "upstreams": [ - { - "auditStamp": { - "time": 1586847600000, - "actor": "urn:li:corpuser:datahub" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.looker_schema.include_able,PROD)", - "type": "VIEW" - } - ] - } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "looker.file.path": "included_view_file.view.lkml", - "looker.model": "data" - }, - "name": "include_able_view", - "tags": [] - } - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "View" - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", - "changeType": "UPSERT", - "aspectName": "viewProperties", - "aspect": { - "json": { - "materialized": false, - "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", - "viewLanguage": "lookml" - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.BrowsePaths": { - "paths": [ - "/Develop/lkml_samples/" - ] - } - }, - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { - "upstreams": [ - { - "auditStamp": { - "time": 1586847600000, - "actor": "urn:li:corpuser:datahub" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.looker_schema.events,PROD)", - "type": "VIEW" - } - ] - } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "looker.file.path": "view_declarations.view.lkml", - "looker.model": "data" - }, - "name": "looker_events", - "tags": [] - } - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "View" - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", - "changeType": "UPSERT", - "aspectName": "viewProperties", - "aspect": { - "json": { - "materialized": false, - "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", - "viewLanguage": "lookml" - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.BrowsePaths": { - "paths": [ - "/Develop/lkml_samples/" - ] - } - }, - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { - "upstreams": [ - { - "auditStamp": { - "time": 1586847600000, - "actor": "urn:li:corpuser:datahub" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.looker_schema.events,PROD)", - "type": "VIEW" - } - ], - "fineGrainedLineages": [ - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.looker_schema.events,PROD),additional_measure)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD),additional_measure)" - ], - "confidenceScore": 1.0 - } - ] - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "extending_looker_events", - "platform": "urn:li:dataPlatform:looker", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.OtherSchema": { - "rawSchema": "" - } - }, - "fields": [ - { - "fieldPath": "additional_measure", - "nullable": false, - "description": "", - "label": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "count", - "recursive": false, - "globalTags": { - "tags": [ - { - "tag": "urn:li:tag:Measure" - } - ] - }, - "isPartOfKey": false - } - ], - "primaryKeys": [] - } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "looker.file.path": "view_declarations.view.lkml", - "looker.model": "data" - }, - "name": "extending_looker_events", - "tags": [] - } - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "View" - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", - "changeType": "UPSERT", - "aspectName": "viewProperties", - "aspect": { - "json": { - "materialized": false, - "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", - "viewLanguage": "lookml" - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.BrowsePaths": { - "paths": [ - "/Develop/lkml_samples/" - ] - } - }, - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { - "upstreams": [ - { - "auditStamp": { - "time": 1586847600000, - "actor": "urn:li:corpuser:datahub" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.default_schema.autodetect_sql_name_based_on_view_name,PROD)", - "type": "VIEW" - } - ] - } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "looker.file.path": "view_declarations.view.lkml", - "looker.model": "data" - }, - "name": "autodetect_sql_name_based_on_view_name", - "tags": [] - } - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "View" - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", - "changeType": "UPSERT", - "aspectName": "viewProperties", - "aspect": { - "json": { - "materialized": false, - "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", - "viewLanguage": "lookml" - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.BrowsePaths": { - "paths": [ - "/Develop/lkml_samples/" - ] - } - }, - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { - "upstreams": [ - { - "auditStamp": { - "time": 1586847600000, - "actor": "urn:li:corpuser:datahub" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.looker_schema.include_able,PROD)", - "type": "VIEW" - } - ] - } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "looker.file.path": "view_declarations.view.lkml", - "looker.model": "data" - }, - "name": "test_include_external_view", - "tags": [] - } - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "View" - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", - "changeType": "UPSERT", - "aspectName": "viewProperties", - "aspect": { - "json": { - "materialized": false, - "viewLogic": "SELECT date AS DATE,\n platform AS aliased_platform,\n country", - "viewLanguage": "sql" - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.BrowsePaths": { - "paths": [ - "/Develop/lkml_samples/nested" - ] - } - }, - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { - "upstreams": [ - { - "auditStamp": { - "time": 1586847600000, - "actor": "urn:li:corpuser:datahub" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.default_schema.fragment_derived_view,PROD)", - "type": "VIEW" - } - ], - "fineGrainedLineages": [ - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.default_schema.fragment_derived_view,PROD),date)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD),date)" - ], - "confidenceScore": 1.0 - }, - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.default_schema.fragment_derived_view,PROD),platform)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD),aliased_platform)" - ], - "confidenceScore": 1.0 - }, - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.default_schema.fragment_derived_view,PROD),country)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD),country)" - ], - "confidenceScore": 1.0 - } - ] - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "fragment_derived_view", - "platform": "urn:li:dataPlatform:looker", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.OtherSchema": { - "rawSchema": "" - } - }, - "fields": [ - { - "fieldPath": "date", - "nullable": false, - "description": "", - "label": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NullType": {} - } - }, - "nativeDataType": "unknown", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "aliased_platform", - "nullable": false, - "description": "", - "label": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NullType": {} - } - }, - "nativeDataType": "unknown", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "country", - "nullable": false, - "description": "", - "label": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NullType": {} - } - }, - "nativeDataType": "unknown", - "recursive": false, - "isPartOfKey": false - } - ], - "primaryKeys": [] - } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "looker.file.path": "nested/fragment_derived.view.lkml", - "looker.model": "data" - }, - "name": "fragment_derived_view", - "tags": [] - } - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - }, - { - "id": "nested" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "View" - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", - "changeType": "UPSERT", - "aspectName": "viewProperties", - "aspect": { - "json": { - "materialized": false, - "viewLogic": "SELECT\n customer_id,\n SUM(sale_price) AS lifetime_spend\n FROM\n order\n WHERE\n {% condition order_region %} order.region {% endcondition %}\n GROUP BY 1", - "viewLanguage": "sql" - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.BrowsePaths": { - "paths": [ - "/Develop/lkml_samples/" - ] - } - }, - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { - "upstreams": [ - { - "auditStamp": { - "time": 1586847600000, - "actor": "urn:li:corpuser:datahub" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.default_schema.order,PROD)", - "type": "VIEW" - } - ], - "fineGrainedLineages": [ - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.default_schema.order,PROD),customer_id)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD),customer_id)" - ], - "confidenceScore": 1.0 - }, - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.default_schema.order,PROD),sale_price)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD),lifetime_spend)" - ], - "confidenceScore": 1.0 - } - ] - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "customer_facts", - "platform": "urn:li:dataPlatform:looker", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.OtherSchema": { - "rawSchema": "" - } - }, - "fields": [ - { - "fieldPath": "customer_id", - "nullable": false, - "description": "", - "label": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NullType": {} - } - }, - "nativeDataType": "unknown", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "lifetime_spend", - "nullable": false, - "description": "", - "label": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NullType": {} - } - }, - "nativeDataType": "unknown", - "recursive": false, - "isPartOfKey": false - } - ], - "primaryKeys": [] - } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "looker.file.path": "liquid.view.lkml", - "looker.model": "data" - }, - "name": "customer_facts", - "tags": [] - } - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "View" - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD)", - "changeType": "UPSERT", - "aspectName": "viewProperties", - "aspect": { - "json": { - "materialized": false, - "viewLogic": "view: ability {\n sql_table_name: \"ECOMMERCE\".\"ABILITY\"\n ;;\n\n dimension: pk {\n type: number\n sql: ${TABLE}.\"PK\" ;;\n }\n\n measure: count {\n type: count\n drill_fields: []\n }\n}\n", - "viewLanguage": "lookml" - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.BrowsePaths": { - "paths": [ - "/Develop/lkml_samples/" - ] - } - }, - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { - "upstreams": [ - { - "auditStamp": { - "time": 1586847600000, - "actor": "urn:li:corpuser:datahub" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.ecommerce.ability,PROD)", - "type": "VIEW" - } - ], - "fineGrainedLineages": [ - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.ecommerce.ability,PROD),pk)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD),pk)" - ], - "confidenceScore": 1.0 - }, - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.ecommerce.ability,PROD),count)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD),count)" - ], - "confidenceScore": 1.0 - } - ] - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "ability", - "platform": "urn:li:dataPlatform:looker", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.OtherSchema": { - "rawSchema": "" - } - }, - "fields": [ - { - "fieldPath": "pk", - "nullable": false, - "description": "", - "label": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "number", - "recursive": false, - "globalTags": { - "tags": [ - { - "tag": "urn:li:tag:Dimension" - } - ] - }, - "isPartOfKey": false - }, - { - "fieldPath": "count", - "nullable": false, - "description": "", - "label": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "count", - "recursive": false, - "globalTags": { - "tags": [ - { - "tag": "urn:li:tag:Measure" - } - ] - }, - "isPartOfKey": false - } - ], - "primaryKeys": [] - } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "looker.file.path": "ability.view.lkml", - "looker.model": "data" - }, - "name": "ability", - "tags": [] - } - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "View" - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD)", - "changeType": "UPSERT", - "aspectName": "viewProperties", - "aspect": { - "json": { - "materialized": false, - "viewLogic": "view: owners {\n dimension: id {\n primary_key: yes\n sql: ${TABLE}.id ;;\n }\n dimension: owner_name {\n sql: ${TABLE}.owner_name ;;\n }\n}", - "viewLanguage": "lookml" - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.BrowsePaths": { - "paths": [ - "/Develop/lkml_samples/" - ] - } - }, - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { - "upstreams": [ - { - "auditStamp": { - "time": 1586847600000, - "actor": "urn:li:corpuser:datahub" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.default_schema.owners,PROD)", - "type": "VIEW" - } - ], - "fineGrainedLineages": [ - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.default_schema.owners,PROD),id)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD),id)" - ], - "confidenceScore": 1.0 - }, - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.default_schema.owners,PROD),owner_name)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD),owner_name)" - ], - "confidenceScore": 1.0 - } - ] - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "owners", - "platform": "urn:li:dataPlatform:looker", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.OtherSchema": { - "rawSchema": "" - } - }, - "fields": [ - { - "fieldPath": "id", - "nullable": false, - "description": "", - "label": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "globalTags": { - "tags": [ - { - "tag": "urn:li:tag:Dimension" - } - ] - }, - "isPartOfKey": true - }, - { - "fieldPath": "owner_name", - "nullable": false, - "description": "", - "label": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "globalTags": { - "tags": [ - { - "tag": "urn:li:tag:Dimension" - } - ] - }, - "isPartOfKey": false - } - ], - "primaryKeys": [ - "id" - ] - } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "looker.file.path": "owners.view.lkml", - "looker.model": "data" - }, - "name": "owners", - "tags": [] - } - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "View" - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD)", - "changeType": "UPSERT", - "aspectName": "viewProperties", - "aspect": { - "json": { - "materialized": false, - "viewLogic": "explore_source: my_view_explore {\n bind_all_filters: yes\n\n column: country {\n field: my_view_explore.country\n }\n\n column: city {\n field: my_view_explore.city\n }\n\n column: is_latest {\n field: my_view_explore.is_latest\n }\n\n derived_column: derived_col {\n sql: coalesce(country, 'US') ;;\n }\n}", - "viewLanguage": "lookml" - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.BrowsePaths": { - "paths": [ - "/Develop/lkml_samples/" - ] - } - }, - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { - "upstreams": [ - { - "auditStamp": { - "time": 1586847600000, - "actor": "urn:li:corpuser:datahub" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view_explore,PROD)", - "type": "VIEW" - } - ], - "fineGrainedLineages": [ - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view_explore,PROD),my_view_explore.country)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD),country)" - ], - "confidenceScore": 1.0 - }, - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view_explore,PROD),my_view_explore.city)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD),city)" - ], - "confidenceScore": 1.0 - }, - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view_explore,PROD),my_view_explore.country)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD),unique_countries)" - ], - "confidenceScore": 1.0 - }, - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view_explore,PROD),my_view_explore.is_latest)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD),derived_col)" - ], - "confidenceScore": 1.0 - } - ] - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "view_derived_explore", - "platform": "urn:li:dataPlatform:looker", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.OtherSchema": { - "rawSchema": "" - } - }, - "fields": [ - { - "fieldPath": "country", - "nullable": false, - "description": "", - "label": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "globalTags": { - "tags": [ - { - "tag": "urn:li:tag:Dimension" - } - ] - }, - "isPartOfKey": false - }, - { - "fieldPath": "city", - "nullable": false, - "description": "", - "label": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "globalTags": { - "tags": [ - { - "tag": "urn:li:tag:Dimension" - } - ] - }, - "isPartOfKey": false - }, - { - "fieldPath": "unique_countries", - "nullable": false, - "description": "", - "label": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "count_distinct", - "recursive": false, - "globalTags": { - "tags": [ - { - "tag": "urn:li:tag:Measure" - } - ] - }, - "isPartOfKey": false - }, - { - "fieldPath": "derived_col", - "nullable": false, - "description": "", - "label": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "sum", - "recursive": false, - "globalTags": { - "tags": [ - { - "tag": "urn:li:tag:Measure" - } - ] - }, - "isPartOfKey": false - } - ], - "primaryKeys": [] - } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "looker.file.path": "native_derived_table.view.lkml", - "looker.model": "data" - }, - "name": "view_derived_explore", - "tags": [] - } - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "View" - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)", - "changeType": "UPSERT", - "aspectName": "viewProperties", - "aspect": { - "json": { - "materialized": false, - "viewLogic": "view: flights {\n sql_table_name: flightstats.accidents ;;\n\n dimension: id {\n label: \"id\"\n primary_key: yes\n type: number\n sql: ${TABLE}.id ;;\n }\n}\n\n# override type of id parameter\nview: +flights {\n dimension: id {\n label: \"id\"\n primary_key: yes\n type: string\n sql: ${TABLE}.id ;;\n }\n}", - "viewLanguage": "lookml" - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.BrowsePaths": { - "paths": [ - "/Develop/lkml_samples/" - ] - } - }, - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { - "upstreams": [ - { - "auditStamp": { - "time": 1586847600000, - "actor": "urn:li:corpuser:datahub" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.flightstats.accidents,PROD)", - "type": "VIEW" - } - ], - "fineGrainedLineages": [ - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.flightstats.accidents,PROD),id)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD),id)" - ], - "confidenceScore": 1.0 - } - ] - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "flights", - "platform": "urn:li:dataPlatform:looker", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.OtherSchema": { - "rawSchema": "" - } - }, - "fields": [ - { - "fieldPath": "id", - "nullable": false, - "description": "", - "label": "id", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "number", - "recursive": false, - "globalTags": { - "tags": [ - { - "tag": "urn:li:tag:Dimension" - } - ] - }, - "isPartOfKey": true - } - ], - "primaryKeys": [ - "id" - ] - } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "looker.file.path": "flights.view.lkml", - "looker.model": "data" - }, - "name": "flights", - "tags": [] - } - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { - "urn": "urn:li:tag:Dimension", - "aspects": [ - { - "com.linkedin.pegasus2avro.tag.TagProperties": { - "name": "Dimension", - "description": "A tag that is applied to all dimension fields." - } - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { - "urn": "urn:li:tag:Temporal", - "aspects": [ - { - "com.linkedin.pegasus2avro.tag.TagProperties": { - "name": "Temporal", - "description": "A tag that is applied to all time-based (temporal) fields such as timestamps or durations." - } - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { - "urn": "urn:li:tag:Measure", - "aspects": [ - { - "com.linkedin.pegasus2avro.tag.TagProperties": { - "name": "Measure", - "description": "A tag that is applied to all measures (metrics). Measures are typically the columns that you aggregate on" - } - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "tag", - "entityUrn": "urn:li:tag:Dimension", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "tag", - "entityUrn": "urn:li:tag:Measure", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "tag", - "entityUrn": "urn:li:tag:Temporal", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -} -] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/lookml/test_lookml.py b/metadata-ingestion/tests/integration/lookml/test_lookml.py index e4eb564e3e86b..94b3b103d0548 100644 --- a/metadata-ingestion/tests/integration/lookml/test_lookml.py +++ b/metadata-ingestion/tests/integration/lookml/test_lookml.py @@ -10,7 +10,6 @@ from freezegun import freeze_time from looker_sdk.sdk.api40.models import DBConnection -from datahub.configuration.common import PipelineExecutionError from datahub.ingestion.run.pipeline import Pipeline from datahub.ingestion.source.file import read_metadata_file from datahub.ingestion.source.looker.looker_template_language import ( @@ -518,53 +517,6 @@ def ingestion_test( ) -@freeze_time(FROZEN_TIME) -def test_lookml_bad_sql_parser(pytestconfig, tmp_path, mock_time): - """Incorrect specification of sql parser should not fail ingestion""" - test_resources_dir = pytestconfig.rootpath / "tests/integration/lookml" - mce_out = "lookml_mces_badsql_parser.json" - pipeline = Pipeline.create( - { - "run_id": "lookml-test", - "source": { - "type": "lookml", - "config": { - "base_folder": str(test_resources_dir / "lkml_samples"), - "connection_to_platform_map": { - "my_connection": { - "platform": "snowflake", - "default_db": "default_db", - "default_schema": "default_schema", - } - }, - "parse_table_names_from_sql": True, - "project_name": "lkml_samples", - "sql_parser": "bad.sql.Parser", - "emit_reachable_views_only": False, - "process_refinements": False, - }, - }, - "sink": { - "type": "file", - "config": { - "filename": f"{tmp_path}/{mce_out}", - }, - }, - } - ) - pipeline.run() - pipeline.pretty_print_summary() - pipeline.raise_from_status(raise_warnings=False) - with pytest.raises(PipelineExecutionError): # we expect the source to have warnings - pipeline.raise_from_status(raise_warnings=True) - - mce_helpers.check_golden_file( - pytestconfig, - output_path=tmp_path / mce_out, - golden_path=test_resources_dir / mce_out, - ) - - @freeze_time(FROZEN_TIME) def test_lookml_git_info(pytestconfig, tmp_path, mock_time): """Add github info to config""" From 6b09346ca554783906afd8901ba9f89e1ef7310d Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Thu, 17 Oct 2024 08:14:06 -0700 Subject: [PATCH 41/50] feat(ingest): add offline flag to SQL parser CLI (#11635) --- .../src/datahub/cli/check_cli.py | 35 +++++++++++++++---- .../datahub/sql_parsing/sqlglot_lineage.py | 4 ++- 2 files changed, 32 insertions(+), 7 deletions(-) diff --git a/metadata-ingestion/src/datahub/cli/check_cli.py b/metadata-ingestion/src/datahub/cli/check_cli.py index 6e9bfddd350f9..39ed1b2bfea08 100644 --- a/metadata-ingestion/src/datahub/cli/check_cli.py +++ b/metadata-ingestion/src/datahub/cli/check_cli.py @@ -188,9 +188,13 @@ def sql_format(sql: str, platform: str) -> None: @click.option( "--sql", type=str, - required=True, help="The SQL query to parse", ) +@click.option( + "--sql-file", + type=click.Path(exists=True, dir_okay=False, readable=True), + help="The SQL file to parse", +) @click.option( "--platform", type=str, @@ -218,25 +222,44 @@ def sql_format(sql: str, platform: str) -> None: type=str, help="The default schema to use for unqualified table names", ) +@click.option( + "--online/--offline", + type=bool, + is_flag=True, + default=True, + help="Run in offline mode and disable schema-aware parsing.", +) @telemetry.with_telemetry() def sql_lineage( - sql: str, + sql: Optional[str], + sql_file: Optional[str], platform: str, default_db: Optional[str], default_schema: Optional[str], platform_instance: Optional[str], env: str, + online: bool, ) -> None: """Parse the lineage of a SQL query. - This performs schema-aware parsing in order to generate column-level lineage. - If the relevant tables are not in DataHub, this will be less accurate. + In online mode (the default), we perform schema-aware parsing in order to generate column-level lineage. + If offline mode is enabled or if the relevant tables are not in DataHub, this will be less accurate. """ - graph = get_default_graph() + from datahub.sql_parsing.sqlglot_lineage import create_lineage_sql_parsed_result + + if sql is None: + if sql_file is None: + raise click.UsageError("Either --sql or --sql-file must be provided") + sql = pathlib.Path(sql_file).read_text() + + graph = None + if online: + graph = get_default_graph() - lineage = graph.parse_sql_lineage( + lineage = create_lineage_sql_parsed_result( sql, + graph=graph, platform=platform, platform_instance=platform_instance, env=env, diff --git a/metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py b/metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py index 0806d0ec774fe..273e9d0f9f0b1 100644 --- a/metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py +++ b/metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py @@ -656,7 +656,9 @@ def _get_direct_raw_col_upstreams( # Parse the column name out of the node name. # Sqlglot calls .sql(), so we have to do the inverse. normalized_col = sqlglot.parse_one(node.name).this.name - if node.subfield: + if hasattr(node, "subfield") and node.subfield: + # The hasattr check is necessary, since it lets us be compatible with + # sqlglot versions that don't have the subfield attribute. normalized_col = f"{normalized_col}.{node.subfield}" direct_raw_col_upstreams.add( From 68cd17b34eb404421898cad4ab26ecab2564a637 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Thu, 17 Oct 2024 10:08:37 -0700 Subject: [PATCH 42/50] fix(ingest/redshift): reduce sequence limit for LISTAGG (#11621) Co-authored-by: treff7es Co-authored-by: Aseem Bansal --- .../ingestion/source/redshift/query.py | 281 +++++++++--------- .../unit/redshift/redshift_query_mocker.py | 40 +-- 2 files changed, 143 insertions(+), 178 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/query.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/query.py index 39370b93b561c..f7fad574f7fbe 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redshift/query.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/query.py @@ -4,6 +4,12 @@ redshift_datetime_format = "%Y-%m-%d %H:%M:%S" +# See https://stackoverflow.com/questions/72770890/redshift-result-size-exceeds-listagg-limit-on-svl-statementtext +# for why we need to limit the size of the query text. +# We use 290 instead instead of the standard 320, because escape characters can add to the length. +_QUERY_SEQUENCE_LIMIT = 290 + + class RedshiftCommonQuery: CREATE_TEMP_TABLE_CLAUSE = "create temp table" CREATE_TEMPORARY_TABLE_CLAUSE = "create temporary table" @@ -487,71 +493,70 @@ def list_unload_commands_sql( def list_insert_create_queries_sql( db_name: str, start_time: datetime, end_time: datetime ) -> str: - return """ - with query_txt as - ( - select - query, - pid, - LISTAGG(case - when LEN(RTRIM(text)) = 0 then text - else RTRIM(text) - end) within group ( - order by - sequence) as ddl - from - ( - select - query, - pid, - text, - sequence - from - STL_QUERYTEXT - where - sequence < 320 - order by - sequence - ) - group by - query, - pid - ) - select - distinct tbl as target_table_id, - sti.schema as target_schema, - sti.table as target_table, - sti.database as cluster, - usename as username, - ddl, - sq.query as query_id, - min(si.starttime) as timestamp, - ANY_VALUE(pid) as session_id - from - stl_insert as si - left join SVV_TABLE_INFO sti on - sti.table_id = tbl - left join svl_user_info sui on - si.userid = sui.usesysid - left join query_txt sq on - si.query = sq.query - left join stl_load_commits slc on - slc.query = si.query - where - sui.usename <> 'rdsdb' - and cluster = '{db_name}' - and slc.query IS NULL - and si.starttime >= '{start_time}' - and si.starttime < '{end_time}' - group by - target_table_id, - target_schema, - target_table, - cluster, - username, - ddl, - sq.query + return """\ +with query_txt as ( + select + query, + pid, + LISTAGG(case + when LEN(RTRIM(text)) = 0 then text + else RTRIM(text) + end) within group ( + order by sequence + ) as ddl + from ( + select + query, + pid, + text, + sequence + from + STL_QUERYTEXT + where + sequence < {_QUERY_SEQUENCE_LIMIT} + order by + sequence + ) + group by + query, + pid +) +select + distinct tbl as target_table_id, + sti.schema as target_schema, + sti.table as target_table, + sti.database as cluster, + usename as username, + ddl, + sq.query as query_id, + min(si.starttime) as timestamp, + ANY_VALUE(pid) as session_id +from + stl_insert as si +left join SVV_TABLE_INFO sti on + sti.table_id = tbl +left join svl_user_info sui on + si.userid = sui.usesysid +left join query_txt sq on + si.query = sq.query +left join stl_load_commits slc on + slc.query = si.query +where + sui.usename <> 'rdsdb' + and cluster = '{db_name}' + and slc.query IS NULL + and si.starttime >= '{start_time}' + and si.starttime < '{end_time}' +group by + target_table_id, + target_schema, + target_table, + cluster, + username, + ddl, + sq.query """.format( + _QUERY_SEQUENCE_LIMIT=_QUERY_SEQUENCE_LIMIT, # We need the original database name for filtering db_name=db_name, start_time=start_time.strftime(redshift_datetime_format), @@ -564,84 +569,82 @@ def temp_table_ddl_query(start_time: datetime, end_time: datetime) -> str: end_time_str: str = end_time.strftime(redshift_datetime_format) - return rf"""-- DataHub Redshift Source temp table DDL query + return rf"""\ +-- DataHub Redshift Source temp table DDL query +select + * +from ( + select + session_id, + transaction_id, + start_time, + userid, + REGEXP_REPLACE(REGEXP_SUBSTR(REGEXP_REPLACE(query_text,'\\\\n','\\n'), '(CREATE(?:[\\n\\s\\t]+(?:temp|temporary))?(?:[\\n\\s\\t]+)table(?:[\\n\\s\\t]+)[^\\n\\s\\t()-]+)', 0, 1, 'ipe'),'[\\n\\s\\t]+',' ',1,'p') as create_command, + query_text, + row_number() over ( + partition by session_id, TRIM(query_text) + order by start_time desc + ) rn + from ( + select + pid as session_id, + xid as transaction_id, + starttime as start_time, + type, + query_text, + userid + from ( select - * + starttime, + pid, + xid, + type, + userid, + LISTAGG(case + when LEN(RTRIM(text)) = 0 then text + else RTRIM(text) + end, + '') within group ( + order by sequence + ) as query_text from - ( - select - session_id, - transaction_id, - start_time, - userid, - REGEXP_REPLACE(REGEXP_SUBSTR(REGEXP_REPLACE(query_text,'\\\\n','\\n'), '(CREATE(?:[\\n\\s\\t]+(?:temp|temporary))?(?:[\\n\\s\\t]+)table(?:[\\n\\s\\t]+)[^\\n\\s\\t()-]+)', 0, 1, 'ipe'),'[\\n\\s\\t]+',' ',1,'p') as create_command, - query_text, - row_number() over ( - partition by session_id, TRIM(query_text) - order by start_time desc - ) rn - from - ( - select - pid as session_id, - xid as transaction_id, - starttime as start_time, - type, - query_text, - userid - from - ( - select - starttime, - pid, - xid, - type, - userid, - LISTAGG(case - when LEN(RTRIM(text)) = 0 then text - else RTRIM(text) - end, - '') within group ( - order by sequence - ) as query_text - from - SVL_STATEMENTTEXT - where - type in ('DDL', 'QUERY') - AND starttime >= '{start_time_str}' - AND starttime < '{end_time_str}' - -- See https://stackoverflow.com/questions/72770890/redshift-result-size-exceeds-listagg-limit-on-svl-statementtext - AND sequence < 320 - group by - starttime, - pid, - xid, - type, - userid - order by - starttime, - pid, - xid, - type, - userid - asc) - where - type in ('DDL', 'QUERY') - ) - where - (create_command ilike 'create temp table %' - or create_command ilike 'create temporary table %' - -- we want to get all the create table statements and not just temp tables if non temp table is created and dropped in the same transaction - or create_command ilike 'create table %') - -- Redshift creates temp tables with the following names: volt_tt_%. We need to filter them out. - and query_text not ilike 'CREATE TEMP TABLE volt_tt_%' - and create_command not like 'CREATE TEMP TABLE volt_tt_' - -- We need to filter out our query and it was not possible earlier when we did not have any comment in the query - and query_text not ilike '%https://stackoverflow.com/questions/72770890/redshift-result-size-exceeds-listagg-limit-on-svl-statementtext%' - - ) + SVL_STATEMENTTEXT where - rn = 1 + type in ('DDL', 'QUERY') + AND starttime >= '{start_time_str}' + AND starttime < '{end_time_str}' + AND sequence < {_QUERY_SEQUENCE_LIMIT} + group by + starttime, + pid, + xid, + type, + userid + order by + starttime, + pid, + xid, + type, + userid + asc + ) + where + type in ('DDL', 'QUERY') + ) + where + (create_command ilike 'create temp table %' + or create_command ilike 'create temporary table %' + -- we want to get all the create table statements and not just temp tables if non temp table is created and dropped in the same transaction + or create_command ilike 'create table %') + -- Redshift creates temp tables with the following names: volt_tt_%. We need to filter them out. + and query_text not ilike 'CREATE TEMP TABLE volt_tt_%' + and create_command not like 'CREATE TEMP TABLE volt_tt_' + -- We need to filter out our query and it was not possible earlier when we did not have any comment in the query + and query_text not ilike '%https://stackoverflow.com/questions/72770890/redshift-result-size-exceeds-listagg-limit-on-svl-statementtext%' + +) +where + rn = 1 """ # Add this join to the sql query for more metrics on completed queries diff --git a/metadata-ingestion/tests/unit/redshift/redshift_query_mocker.py b/metadata-ingestion/tests/unit/redshift/redshift_query_mocker.py index ada76e624032b..06b592d42914b 100644 --- a/metadata-ingestion/tests/unit/redshift/redshift_query_mocker.py +++ b/metadata-ingestion/tests/unit/redshift/redshift_query_mocker.py @@ -56,45 +56,7 @@ def mock_stl_insert_table_cursor(cursor: MagicMock) -> None: query_vs_cursor_mocker = { ( - "-- DataHub Redshift Source temp table DDL query\n select\n *\n " - "from\n (\n select\n session_id,\n " - " transaction_id,\n start_time,\n userid,\n " - " REGEXP_REPLACE(REGEXP_SUBSTR(REGEXP_REPLACE(query_text,'\\\\\\\\n','\\\\n'), '(CREATE(?:[" - "\\\\n\\\\s\\\\t]+(?:temp|temporary))?(?:[\\\\n\\\\s\\\\t]+)table(?:[\\\\n\\\\s\\\\t]+)[" - "^\\\\n\\\\s\\\\t()-]+)', 0, 1, 'ipe'),'[\\\\n\\\\s\\\\t]+',' ',1,'p') as create_command,\n " - " query_text,\n row_number() over (\n partition " - "by session_id, TRIM(query_text)\n order by start_time desc\n ) rn\n " - " from\n (\n select\n pid " - "as session_id,\n xid as transaction_id,\n starttime " - "as start_time,\n type,\n query_text,\n " - " userid\n from\n (\n " - "select\n starttime,\n pid,\n " - " xid,\n type,\n userid,\n " - " LISTAGG(case\n when LEN(RTRIM(text)) = 0 then text\n " - " else RTRIM(text)\n end,\n " - " '') within group (\n order by sequence\n " - " ) as query_text\n from\n " - "SVL_STATEMENTTEXT\n where\n type in ('DDL', " - "'QUERY')\n AND starttime >= '2024-01-01 12:00:00'\n " - " AND starttime < '2024-01-10 12:00:00'\n -- See " - "https://stackoverflow.com/questions/72770890/redshift-result-size-exceeds-listagg-limit-on-svl" - "-statementtext\n AND sequence < 320\n group by\n " - " starttime,\n pid,\n " - "xid,\n type,\n userid\n " - " order by\n starttime,\n pid,\n " - " xid,\n type,\n userid\n " - " asc)\n where\n type in ('DDL', " - "'QUERY')\n )\n where\n (create_command ilike " - "'create temp table %'\n or create_command ilike 'create temporary table %'\n " - " -- we want to get all the create table statements and not just temp tables " - "if non temp table is created and dropped in the same transaction\n or " - "create_command ilike 'create table %')\n -- Redshift creates temp tables with " - "the following names: volt_tt_%. We need to filter them out.\n and query_text not " - "ilike 'CREATE TEMP TABLE volt_tt_%'\n and create_command not like 'CREATE TEMP " - "TABLE volt_tt_'\n -- We need to filter out our query and it was not possible " - "earlier when we did not have any comment in the query\n and query_text not ilike " - "'%https://stackoverflow.com/questions/72770890/redshift-result-size-exceeds-listagg-limit-on-svl" - "-statementtext%'\n\n )\n where\n rn = 1\n " + "\\\n-- DataHub Redshift Source temp table DDL query\nselect\n *\nfrom (\n select\n session_id,\n transaction_id,\n start_time,\n userid,\n REGEXP_REPLACE(REGEXP_SUBSTR(REGEXP_REPLACE(query_text,'\\\\\\\\n','\\\\n'), '(CREATE(?:[\\\\n\\\\s\\\\t]+(?:temp|temporary))?(?:[\\\\n\\\\s\\\\t]+)table(?:[\\\\n\\\\s\\\\t]+)[^\\\\n\\\\s\\\\t()-]+)', 0, 1, 'ipe'),'[\\\\n\\\\s\\\\t]+',' ',1,'p') as create_command,\n query_text,\n row_number() over (\n partition by session_id, TRIM(query_text)\n order by start_time desc\n ) rn\n from (\n select\n pid as session_id,\n xid as transaction_id,\n starttime as start_time,\n type,\n query_text,\n userid\n from (\n select\n starttime,\n pid,\n xid,\n type,\n userid,\n LISTAGG(case\n when LEN(RTRIM(text)) = 0 then text\n else RTRIM(text)\n end,\n '') within group (\n order by sequence\n ) as query_text\n from\n SVL_STATEMENTTEXT\n where\n type in ('DDL', 'QUERY')\n AND starttime >= '2024-01-01 12:00:00'\n AND starttime < '2024-01-10 12:00:00'\n AND sequence < 290\n group by\n starttime,\n pid,\n xid,\n type,\n userid\n order by\n starttime,\n pid,\n xid,\n type,\n userid\n asc\n )\n where\n type in ('DDL', 'QUERY')\n )\n where\n (create_command ilike 'create temp table %'\n or create_command ilike 'create temporary table %'\n -- we want to get all the create table statements and not just temp tables if non temp table is created and dropped in the same transaction\n or create_command ilike 'create table %')\n -- Redshift creates temp tables with the following names: volt_tt_%. We need to filter them out.\n and query_text not ilike 'CREATE TEMP TABLE volt_tt_%'\n and create_command not like 'CREATE TEMP TABLE volt_tt_'\n -- We need to filter out our query and it was not possible earlier when we did not have any comment in the query\n and query_text not ilike '%https://stackoverflow.com/questions/72770890/redshift-result-size-exceeds-listagg-limit-on-svl-statementtext%'\n\n)\nwhere\n rn = 1\n " ): mock_temp_table_cursor, "select * from test_collapse_temp_lineage": mock_stl_insert_table_cursor, } From e462be6641fe310e9800eb107fdb58e87b0f1992 Mon Sep 17 00:00:00 2001 From: sagar-salvi-apptware Date: Fri, 11 Oct 2024 17:21:55 +0530 Subject: [PATCH 43/50] feat: Dremio Source Ingestion --- .../app/ingest/source/builder/constants.ts | 4 + .../app/ingest/source/builder/sources.json | 8 + datahub-web-react/src/images/dremiologo.png | Bin 0 -> 18216 bytes docs/cli.md | 1 + .../docs/sources/dremio/dremio_recipe.yml | 25 + metadata-ingestion/setup.py | 3 + .../ingestion/source/dremio/__init__.py | 0 .../ingestion/source/dremio/dremio_api.py | 721 ++++++++++++++++++ .../ingestion/source/dremio/dremio_aspects.py | 561 ++++++++++++++ .../ingestion/source/dremio/dremio_config.py | 197 +++++ .../dremio/dremio_datahub_source_mapping.py | 97 +++ .../source/dremio/dremio_entities.py | 452 +++++++++++ .../source/dremio/dremio_profiling.py | 189 +++++ .../ingestion/source/dremio/dremio_source.py | 608 +++++++++++++++ .../source/dremio/dremio_sql_queries.py | 334 ++++++++ .../bootstrap_mcps/data-platforms.yaml | 10 + 16 files changed, 3210 insertions(+) create mode 100644 datahub-web-react/src/images/dremiologo.png create mode 100644 metadata-ingestion/docs/sources/dremio/dremio_recipe.yml create mode 100644 metadata-ingestion/src/datahub/ingestion/source/dremio/__init__.py create mode 100644 metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_api.py create mode 100644 metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_aspects.py create mode 100644 metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_config.py create mode 100644 metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py create mode 100644 metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_entities.py create mode 100644 metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_profiling.py create mode 100644 metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_source.py create mode 100644 metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_sql_queries.py diff --git a/datahub-web-react/src/app/ingest/source/builder/constants.ts b/datahub-web-react/src/app/ingest/source/builder/constants.ts index b67ca388c1054..8480b5a9a86e7 100644 --- a/datahub-web-react/src/app/ingest/source/builder/constants.ts +++ b/datahub-web-react/src/app/ingest/source/builder/constants.ts @@ -19,6 +19,7 @@ import clickhouseLogo from '../../../../images/clickhouselogo.png'; import cockroachdbLogo from '../../../../images/cockroachdblogo.png'; import trinoLogo from '../../../../images/trinologo.png'; import dbtLogo from '../../../../images/dbtlogo.png'; +import dremioLogo from '../../../../images/dremiologo.png'; import druidLogo from '../../../../images/druidlogo.png'; import elasticsearchLogo from '../../../../images/elasticsearchlogo.png'; import feastLogo from '../../../../images/feastlogo.png'; @@ -51,6 +52,8 @@ export const COCKROACHDB = 'cockroachdb'; export const COCKROACHDB_URN = `urn:li:dataPlatform:${COCKROACHDB}`; export const DBT = 'dbt'; export const DBT_URN = `urn:li:dataPlatform:${DBT}`; +export const DREMIO = 'dremio'; +export const DREMIO_URN = `urn:li:dataPlatform:${DREMIO}`; export const DRUID = 'druid'; export const DRUID_URN = `urn:li:dataPlatform:${DRUID}`; export const DYNAMODB = 'dynamodb'; @@ -133,6 +136,7 @@ export const PLATFORM_URN_TO_LOGO = { [CLICKHOUSE_URN]: clickhouseLogo, [COCKROACHDB_URN]: cockroachdbLogo, [DBT_URN]: dbtLogo, + [DREMIO_URN]: dremioLogo, [DRUID_URN]: druidLogo, [DYNAMODB_URN]: dynamodbLogo, [ELASTICSEARCH_URN]: elasticsearchLogo, diff --git a/datahub-web-react/src/app/ingest/source/builder/sources.json b/datahub-web-react/src/app/ingest/source/builder/sources.json index bb1c1a10ea6e5..c028d104149e0 100644 --- a/datahub-web-react/src/app/ingest/source/builder/sources.json +++ b/datahub-web-react/src/app/ingest/source/builder/sources.json @@ -302,5 +302,13 @@ "description": "Configure a custom recipe using YAML.", "docsUrl": "https://datahubproject.io/docs/metadata-ingestion/", "recipe": "source:\n type: \n config:\n # Source-type specifics config\n " + }, + { + "urn": "urn:li:dataPlatform:dremio", + "name": "dremio", + "displayName": "Dremio", + "description": "Import Tables, Databases, Schemas, and statistics from Dremio.", + "docsUrl": "https://datahubproject.io/docs/metadata-ingestion/", + "recipe": "source:\n type: dremio\n config:\n # Coordinates\n hostname: null\n port: null\n # Credentials\n authentication_method: password\n username: null\n password: null\n stateful_ingestion:\n enabled: true" } ] diff --git a/datahub-web-react/src/images/dremiologo.png b/datahub-web-react/src/images/dremiologo.png new file mode 100644 index 0000000000000000000000000000000000000000..95503c049c02682e7f7944144dd887547ee7954c GIT binary patch literal 18216 zcmWh!V{l|m6pc3CXrqm7dt=+)m>b)+ZQHhO+qN^Y`OWuZ>QznE>(_m|FZ!Hw!{lYf ze!^nGf`EYhln@tI1OWlX{Qn0H2K)#5(t860MD$uhSU}k|^D@gtOG)i}#O5hGFv>EH zFgL&Td{9C2S*NP1S#@Jw*8_0R|g-*`@Z>{rsg9&zrir8Aw7x z=y(4$fRX9pa0PRm;4uB{3H4h<0U89v5z#;n5d`G)CkP=Zh%5*+42Uflzc`5RFAza6 zkW7%a5-1Q5SC&^7*y&83lmxGi{QCNaIB^mLyu?PMv3!9siC}av1|M*c<6MgL1c!@| z9B*1$+6G6&C{Yi$0l0(VC{W-ywT+F<)@ye&d&DD_o!=KSx`QJlenq$-H}oymN86*5 z+-~R6Qc@NshWvWH;00p0uAQKDjC;s&k`R>D)m-im>dRxlg0Uqh#{FVJd_7Jb+Y$NR zF87NjhNFa0%myati=(iGNRSH60YT-#1Aj%tM_{8FjZpU{f1@t%Wb|<=Lj^*dE;KeZ zeLqatv{-LhTU(!57S5enEOY;%_(MrTQL9MPp}L^FOt;W4e)5yTpMSQ;gD6iPY`!bzOe3)qpb*^wNk4hPcp(H##xLkp*-|x;52GF_;IX|CU|=8x!5&1;FD?*^ z@yEue8JkCuB?=`H$Yo@)@$oUlNRZ`{msLZJotQ|i5r6(k3&t;1Hg{F0Ktjp2@ykEW z=6Uz|cvXw6OP@OO{o?!Vsg4`H&ZhF@HEB(#O0s&tA-_cuTx7(MdLAiZ1QoQhpEK; z_WpmG_Kg~Vq6xLHmxJr|cFqH5$Ou-~%hab?Bp&}$Wv`DQcK4N@owo z6UmcltXTpD0!WKg$&97aNvdl^DX^{RL5muS>Rz{%mEFw^#%J+E!&G3vRS44QcE26S z^%$VrBS8$24fcEQ{8A&mMEcwm*Xf8Xuy0T7zlq+MBQrZ&J7xC)pA1Ue0fu4 zSfL6fLTG7{_(=7 z93K`e)W{Jd*+15cm9MEwOIdkv1(8~dx}pbA7)~#zhs&i-%IUHNqNF{$iRS3}d}a*l zY@vWh@5Y*m*~a^3uEf3#Y=Q3X-s9`lB%}nM@q z*H%Hr9)!^TsWi)-uMQ_uX=4Tq@aRGDY-d5^!B~{e-ArO7ipg2r>xnKCt#+fk0`#4= zbPNQ!6Da||VRl(5RLZS3T(&#Yw3U@3+elDhQ~2sFZ*mJ`bNPj&611H18=D)o&824c z$M5n7-dW<`;rOVM+V|&-;f`kj-1bcfNGQEr6!bN#A5$)FZZ4Dbq=g6@tzX~p&Q=4$ zjvaNhU9~86R&^@Q=js5b1aT71tS5^>VcXAOyi&=~$bZ zI-CJKPUn&9EJo1&xwWA;M-wSm=BC2m7<%Ljrt~VT+oe{`#(07XjZL+=mx~Hf58V+| zNs?LYF3aPpns=63CKBcbWh@md5Nl(oO4pSOxb$2 zriRgy#RjH`gOdK6wTNMW-GOiF7k=k=Ge3c?WiK#J&j^+*d^R2BWOY3Ce!~d>0;lz8 zw405tE`Mh^0|@Y1w2R73Q^pCKqduj|sU2EA{nDMUuCJfhO7Jx}Et`J)ZxMoovG0bn zi+c>ZJ)JBTr#ogjp0R(7T$+a(S%M*sn1fCf;V94$7oPh&6AZK*<6bRW(1lgcU?eNXVdNPt$elj9%+)zh_oOi5tvyP&QAKEyqz_FPmu7^VmT!Ir(E}b6Or?*Kp|VHY40L zIe-Bjp|P>CGrxY^7acJ}nNbQP45QYg4~Em|{yJ3M@Rd@7@7 zVrpWf3u}q*ZHiszCjyI|rI7rCyuPulP{w0+(H&cqDkTa8_CE#{4~yv>z6%wyZ#Y!g zqY8Sys0iHM$QJ(}&|?B5C})5T;7NULak!V1n$UVPfrp33yzf0-)ED~o>sP1C_H{2%+reW>0T+sO8yEoz*XTr#!|nbNHaipJ7|SM1#uOfp`QT<-Lje)7yE(iw=)lbr zZpYVJvy$O)mL9aOm+f?=6SZ2bw3kqkIbiyON$T}CEw5Awwe-${M(E>+lvKved<1>$ z7~KZ~m|(rAkm12p#@f`>)X`BAa5=}rYk0yGsN4+h%6fXetBi;*@9&Qfwzj1;HXPY@ zcdo-Q2aW4l46mNfMiAtj=6$JiW{jU)8%0=vs^I0xb@a4lgK$v?y{u|R_KrW_Y{S(^ z=;)j#Pm>|VGEH+~8~VPnwy|S}VllXU9#4V_jLX_PbkM>?L1X##e^ALH`iDv_B*6LJABgKU z(Fgd&pUq57rTH8S%gZ^r$i=FL|Awds^6S05)PH=m^?g#%JawVLC5y>T7`F3AGY<;~=L*E8vzRBM z#NczdEZh|p_amU@f0_s+yot@o$d;#2GE16Mk**fPX%pF)w|3ZslvklGbia)yE#PJL z@7dybQ9pqOLO6++?mhE;^Ew>kkmXddLJP~vGt}sE(9>jMpor!xYO}DRil_jpXkG3# zBd*4y6f6D`F%fyL)sr5y1Mc@h{{CId=y|oVqWvQHYx%TZvLY>mZS(RN5SieFYaoRH zi-m;g-qx;QWB3t^{P-uW=ywQXv;#ckL7S7DTsF(x-18o^mxj38>uX314m}-RS!wCz z-9MBhXF29PWSThqIO%^-5NmXJ5PnCJb+R=<$UG3p;=(8iea5qwY~~kkP|4I8$&|3LpS9-z*8C-XRn@nfd)awPnL_p`TIt-u24Fr8hVEGfA zu2`(tsIKrM2^TQ^(CT=0jV})mcOYlv%J;b6C=8RU)FMO)yY#`|%BP2g5r|bPnfdyB z=4*0#)ZUgZX7>CQ@A)=zH$dFs7||H0zFfJiw*U#<2C7`%#L~7bfLyre?5usPV(d^* z!0Y*qD+G#AiSHh4#@h?(0alb09Mqq}k`6Kv}(;i-KDcxg$Xe+FY@^0I2T*<0PS z@3b`Tsvp8T1J0OBYSgULxYgM@K32x?2s;zS3k3_SozsiG&ZgQaFPb7}myLb7xKV zMd}5g&xrwo#dhjoR6l&&%`D3zJr8-C9(U~V{RasYFL;p}KGe&W{ z=;Gtj94vIfm0#4*Qcvw3@f6_F|7dlZsot}1i|!^TpM4K9EG5ScVB2 zdXmH1&*2C?TX%0FDQ)C z5qC7AA=FD~`J6g=-yw^$W=?2zn~k0-e*@P~zK{`=Xt%%E{RtI8`(5*kGO?GitiKdf zr?lyH1hYtzOhu2j3Q}A+zNR+E&BM?FE!-f=&j>|i6t0&f!xqlZAC@%v+R(6()iE4rF~;z$3A1x?!!|eRVag(1I5Y` zMd8gJT%tMzOQT1NsmK1EcMBs?3DlUbFL_WYwAz;^CxJ(H&u3YJ=H}*KGsQO6>Y?_G znfEW`B@jjMCeVS+E)fLc4jxTOJ?g#`o1F@I@#CmuyeSZ&1Sk+2t<<)uPvQ0Z#P3lK zO-}XZN8zLq80)KY4i5Jo_?}ww@}_5yY$+YqTg<;kqS<;Nh)0jgha*&?IN?rUDZ-n> zFCJ>6B~EPDc#qXC8mtquIR8-$+D!VtI^<|{jSsi%Lb_+f9@@XM?|eOf1WBQ15)(27 zAV2j&2q;HtqcG(zQw-Msn^=JXCHjyyrE1d3i|-H37A9dnwZux&-{Re)(W`2HIGh{S zIMd`O5q{26g<3Q=*Komyi$~1=bNd1halTqNqzv@K#l&H8 zY_+iji@V$nS@pDL3>GzpX~nCXWPlWzYxeMX*g78GFIlP1X7(VvN^fp2KAr&vXOAoY zq-^knU$0|hx*004?d)Nc${1AGcj>my@vWwfl693Ly^q&gTdYT0be#1wn+SibFTttt zb&+u_bJ{k4K0p!@Lf1Q8Jn^+yHaP}Ru<;u7D|7DjNlRswvI1X*c}9s7N4X8q2K+u_ zpa@sh;n4Ov!zt!rVAze>@n-rm2d2}H9L;7`wH5XAaB(6LnN0p@_!VO4F!7InSC$7A z<~*LQwNyQ8sxVGq1MY2-{f6(RXm=l3J`>or#{Cqi;}Clq7GhpgV=7om<(_al;Bv<3X z+ol&TSgG0ZslD&MKA8z3C~Z}-Z*0}DY$9oO0*1HSIB+Xc4zj2}4JOrY=P)tr$Y}>j zg7c;<-UF(|$~&#XI(%GAxf?&i^EI;8_2$S>Bv6^A^jGrm1QeB2T=i1g zSl3X5LQ1M#jm#{+Pp6aVr!#3SK3!^F6xR!-RHBRl%h7FB0ll`5NpQew|J?cVt3kIX z)?}HN)$tT5gv$kco4l~Ox%p(JM=RunOe}v+P%H-V_f_76=}cZxX|CS2b7{r5J6+EA z$Nj}{Bu=#1_tAM%Wx9`cr^tt1eZerc(AsCe(7Oh#{S-YZIeFL9IAk2O#jj`SPHAy2 z=c~1DLyYPSxzp6J{8aU1-4)h65Zr%tnn7nCG}=Y-W)sP@**q4I5q<~Hj@6BpV$RWf zCJ0ia1{|kUdFU-WJhc<8>vrToKh$lq4%Y@`_!K^0yon>tmDQKZVE1uAjfZ@5b3~|Oae>MZ$1f)FGMUh5<5wjZ`Z(Ub@<>{=B7Fw0NXHZJ{=z>XMa&j`O z9G^+{d@?^tuv{uuJZF8#@KQx?-VLhm@~+=)vh{5LP4woxJtJF&zanjdGlalwDLiP) zg)35*sFJHKd$r%jirT}l1wA58>h3Jp4K%ll=z7C*udZe@6)KsD0|XEkv6B}+H!U7L z$h7N>=0N64oxnoBlp@@_KFc^P*Nm!a!CFt`%iNF>92pQvEsLPR-j&5#ES; zsHXd;#l^6hx>tl>Dtz=ZxLO6oF9;E^r< zBdFnkhVGC6x?N){RbgGE8V$lPW4*j;M3vCptM4nzYRF}ZK2E%lp7))DaV~;9UzXci zl#0IQWw$_%nz~Mc)>S_)4>1zl`G*PP>`eZNewDIaduR1#FtaLA+@SHh_)(Q)Q{>QY z8oqQ3I!1ENpK9|V7_l4<{mp$5IHE2RUCR{LeVO~jZ~adg)_$;uR~%_2V_SXd^XeNNz@eoek*ih zAfn7yZ+W;}uf-@J#|ef>XP`4s40UYUa^rBO4Hl+;tXyyr-PmWWYa}^$WY(uX9}@*h z3Kq@&6e|T7ld7c5NLAPwTnY*Lt8uq`_PdfB#oSPEi->PDUw=Hd@G(XS!y4a=m!t_^ z+oIeOm6GJ&VkkukcfZTFDIbV88wE&FM~o3Cl$lrgOPsJ^9Bys({Cb{_BM#|lx^3EK zL3WzI7o5L%q|Q+{yy7Ac4UNv}Njxna4+Jq>bWiYYTR3t;s3`q~m6W$)ZvB1VQCN@j z9Q>1>_j`A7Y|ArzWPSi&sZ9B4>zOaTA`}B_`#bM1zX+Wm9I<&nC7n+v&p|fs;7xR9MU4wv?xEZDI{*#BeaJ>Bdy8By!j{T97 z5Q&IfhPf1404YfWl9(Y#x@bYY*XYENYKRH?jh&o0Leh^Q^P^H*F(=`#`#$tlQE2Fm z9IyM#NJ$N5xI4c%DQQtn*DboJh+{W__qJkkP7mJl7^)Rk71o=sIYgV{me~3R z8!OKXZRt=Bb`l!>K1HT1(1ABqJ03E6%E;59`hUoL(34WK5CU_TZ_9ao-OtI+PpE1l z3XLDqq5#&+UIrn7X~bb)YG5%8igU#oKT4X+rcy0p!{czPkHWpQ7Rx`^o11N@ zIoBG@Z`Ni}9R)3;Fk@h^^JD%Ec$#+DRSvsF4`Xq3eYn+imyek865880 z_Xm7D`MBfVnIQRsqb>AT5vxo3`oL>5ItY3ESf_k~I$4xx1?vRuc8Tq4$vvQDr^d#V z!J1H^?Djr6wc}fUlm!J@Op1jnLEwTkSlQ0_uSeRDyG$?x$GyEqHKahEAt`GSz*)W5 ziR#jNIYQB-4Q!?RwWry#%V~q;89!-2Myox3cxWt1fB%S^#zU7=!7Cb0x!F=Wx48fU|S8jK-}^Wq#I(DCJlF*3Z= zQWog&s>(*b`~(b^r4tuLsV72%KbFt9*^Ejk*+(|{Sj$_4Be^1_y07VV+um}fcJ#1_ z7mDrt{k3m7==+P^g85ImQEpg(Xic>P@K&QcBYyDlyR4|_v4`)&a$cAqFw(Wxgg-Ni zhX6W%ezcL3)xQ^&DN5E*lWO6@37RNBvTC^XMv$1B8A+-lHU&p6q-j;%(a$%B$w(gY z=eUyXU1XtRnXboY)tmb-X6@6{TjT@s4ev#YWY!5(09KV`$qLmHQ_%P_7)iF=Y;Byp zT5!UyKJg|i&im-r6V&jq9swm~+(6xuLC+)FiG_dMVj5qf{luS~tV75D3 zF;!_raNUL$B7m3dQ?N(ch)O~Uo9$47qC5jnHlL`f5_u1S| zkPDbx4&4Aq3RDqe$$R@J3jh2<9RrXMq6NId+n1w~n2Z#1Sn1z*yw>qh>u*v_2&_l? z7pj=4jGuYDDD5V=CB_e^RtESEl0P{B1af7SN|xh`$PW5^y7u&4_abtVfu^{AL`c zL)+3cMG-$$UCM1|%yRjozPbBfHT}BhUM=c_m~S zDynrBWVUYoHH3x$OQDS)y)F^Ti#5Z6lPChE6VcGjYWbvhz}6-olL({6(>~r7aHEq| zUpL;KbboAOkJ3yf+6nwIapnmco^Kw7BI=0St!@MvWfpN=*4L0{=ugCSii5bx{9r8) zQP$>N_I3P72gfERDmt0N}X!|%ekVAK7HCo1AU`t@-m z-lV!lRn4v%mj^4JQ3xvF9!A@d;&=Lpb|Po|{S}~11%#Yt^9F)MhUlyMVB(+tLHDO!Jh)eB5SwsnKw<9^p-x4KoOv8YttPe%laP?|ayY?) z4fK3Ig$R3oCkhtK;qkb8@=NE?1p``RNl?a4bglbYhPk;z#tkBhWGJ%3;Y(Ry2}lq@ zMPi3LGmASllTn-tI)T99(XoQay^>3lrMt@(C3tEi9(!g)yIg&w*F43VZy?ok@l31y@J zAPcy?m}_Y!2#_uXeoB?Q+u*FF;tk>(Vbr6{t(j{4hSp)1N0Ef@Qm*I zE2{(v3Z)U%6va$uYJY;yk+14Og<21N3A(5|EH-C`i-1{E^ zYCPI@WCcyo9qINz1o8K{uDj7pSRI~$a0eQbW#9cvt2zea)I#06;s1qJMrZc)>GzH0 zB#$&?ia3b`npGAgCuh=_!QH#K2n>-Qfx$f3nzSx@fK&-mo{xzhm>)lxvn4U1xJ`Pv z0^9&flx*elDC^t5BFW-8r))fiAYNcHYfMQRJ-5KJHZ~(U)+duCV5GD=Pn0MSjoww? zI^U$F;k17#b@MPkXHbhAkEoRs7mLBeh>-cH$uH&}+=XLk`Hhk~Yes{)Oh_R~rr_Q} zj~*oqzhC>Y8tJ|1@eTW%#o#%)#vLNg!b!17wtTL+jK#jHTBbLy^cRUCnK%|Usmc-H zsA&9Hbdxi|c@H0n!Tn*hG3=K?K5<9q-)d#A&(9bv)@4;>40ZyVMqPHXeZVSVA&}gu zVM{Ay$*ce44TB!ge&q%FyiFM@K{)b$ci{Eo)727XZmffsDQ>i6;l(i`jmikCO1R2O z!91NbzP=+U37DZ&RDK>Azg5%N3&v3}F%kXw!{~H6E6(hY>9owl-_?YOlSI@vWmX3n+^*!A)-gKrG(sUsHfmVUtDRWPH(DV zh28BMM9GceMUq8=B`Zo4sE`4DC$()jvX0U|$T=sebtk=X@LFiE&_N3VrELH~a|?@} zIJ0=t#Nh)E%XEa2ffA(y&(|>Tevs&sWz&46(LSOay}0`dnQU&4uZ=0)10DC

Oas zxl5$^Xxt74+Ps31IErUgYj^dMxpE*TA2ohyTx`Xe#zM2oF4+qhx#)1R)LdWp`M8#A zO#uV@PzurC3VI`BgOMm%FwW$*+7v95_j()2qo7J=wP=(SB2MZ@vi0&1TZ4v7V<8b| zAsVbHkhj|rDOl+G+zv5d>@KUNU;J!A&IoL@c-Bcfn-c(HHK=4tR5k!*1+gNABt=>i zw>A^@7*SkoJfwqxivtuUBh%N*i7AmEE_OAdA|kvj0D%_7aiKa1akZK>RIEJvRskU~ z(?+TyEDwf~IjSx-@z2&ucI5#6jn^fhFFudTN^^dJrW)s@2TJs}V^a%%-Nb6z? zLSv)MPQ&xRBXB>-;c>Vji ziu-#ZDAtj4tD5JXA<7$^HXB!=lUBloA>}@j)=$gCbkHBm1fzZrVDlV`efVA4Rkq>M2!ML@2eQXpW=AuoCcL?DFmNF zEgTuMSiTeZ-_#ib_+8=YQ

yWFWZytW9D`14@j}Lb z*y}(>@e&0_m`>`nzV6R&x~O>c-m%M0C5E2AaVb~XmNCpUqrQ*tT2PUA<73$ z6!B(wJ&B0S%RpE@(Dy(VjgVr!T$z1b#h^*Ge2&V=9U_QK6f!Io4qXaS=+6i$<5 zNij&#q@F4p1LMk%Dt;WTVx`i;#;VDFW;AX9U$d?=M(z6=CBi4m>m5N)<3)d4xh-9o zU@{s`>c(LNy;`EW{qIWr&So^S9QkL^CWKGShP3rY*Y&fmPfPZr86bV>{&?CRI2kn0 z|M?%CR@brCAZ$xrBy~hV-(0^h|GhqM0WA!&P=DmcLK#JRrR1gw^Fml^m-w(iyFI^Z z>@aW^ww9~SruSVxN9t#YA3u2OEyoVW%&c)zFaqcc7fhxpQ6&%1H!R|xIUM%;q1x{* z@I~gR!ajDl#+{p*0_(F1+tufv6jfC|k29`WSXngMLIeqGeD>M|#_+VErV4s7;@v3K z1rjj|#Z44Pl?sHkN{E_deTs_9T2Fpcf-qgXEHkk>7 zJ8A7b5`!CHH_w1eA97+U0;kVGLOy&TT!V`w@UeV7IZv`cJ6e{MMG6}{nn0)5u1|%R z-MEAj`~S%HH!=iH%Yp9-L%!S`lM1&Ip@j)oZTYn1E7G`J9$8ZTnQjSz+f`jz>iO;? zKPoC(wjquax9NIaKUE;!Vy)hWjEXot_Pap7qNdPul~8-p|smpQ+n|g zDx?g|1E4QWt7OGyqse7s*~%SP_7>VOJ|O`}7y@{+J3BJ;f=JLK7)4%EiTF&GQ0+LC z3Iv0n9lFWw8w8z7Ys=Tm6*ka4|MEP|O?Egum*{#^kM=AaW&IkC(d=?3l~-WzgQa9= z$6wgTW}l5Zs6zvTkcol8#UZhGpK-|;dWWTQoThNRn7Q%;V`F{%359^O-ZPW_tHrN$ zG!%_GIp85$7L>Y>>t7n3a2<>_PK*o^OyF9bzAU=~wbNU{(9)~S))crU|7h-O>k4^k zd$aY$GN42*=d50lCr(_e+3}WmBE?nYFhb22O;JRo=(Yw}rR^Pj*nCV}7r zX!Pv=a$3+>NVSV~N5%ujF(MWTg2(O2)BP>4!==6Y9awfH(`tXrHd1_RxI?QlZN>9RnO~%rIu}q3$6KL_R_c1hpHdE8`qj=A#H>2T0oV z@MCgK|EW%Ae?m1>@6;Z76UDvU2q~#mxo1w@5!9^dgoVXQN=nc4z(6#cK#zq;WH1se zI<=v@I+ml>9Ci)ZnoY~OWF}>N1K4-e)C_tqY2sp$zBd~z`h*YhqyqzQ>kXj;g%Lv| z;Ex$F;3FN&E>1FEZYq{!(wu0;)q)MN?F_bW-iC`O)T(v5-9Me#*jUp^^Ye*Jqls3ZpR zN)N&E((1J~hvzEImJF;6OHY^}!N0FB1xm%z`*vMTp?+!(y5g`wibQc(x&;|3f1R1Z zJza&sw%l=WM)jwo5WiU~U}VK>0S_)kX~B*fGHwDi1Rg`}5Lb-a}*mI3(@&07lbmD9iK4U|fl zJogH+FsN<)h@*4{p00L8-U3o9yt$ZyZZy<&?F{}}?e1Njg%8_3+9Fk0LIg;`B?wZ| z(y1JFoi(Eeo1wr2FYKu1uV@$uzkYa{{hUaO1f7uYkBbyv^os;SfL^ELYhtEnX=QnR z!$e+wcX&lNEbP`_Sk@~!o6m+dSb&UHBGO(nSpNbVvk`a(`lkbb9tU6Fm8U-lh16)z zSeDncxWF5ZM0x|sWGF;D2(S?D%y&V5{Z=tC`xpT~UTcKl280ps8Q-t|ce0L7|9^|O?IL4p0;JhQAoAY>8)+!l!|b4@MS5$U!lwx3lgEoIE%PPHRW5&>f0G4&L%j+SsC2=JrDzR z!kX!pQBnYPaAG&Euiz=q6a2s3gKOXru5{>L0QuE4x9v%ZhhKsB9yHPem=Gck_g9;( zX-+r0gh^o70m1~%`c7uu88zk>eJrA)qJzT-wol@q*-|qcRVW0e!adnFl~l=+F*zJr z-Z%0EM@q^UZT}P?$~yd%r0s9>f5>7H_i~;7CM1P=X(eZ6qcg8!`<$KI+}y0J#EEK; z7*J)J+TZ^rvg_}~W$d(+`ZNT0BA3Z9EfMg;73w)?Ft=mIJsE4BIjsAE!T$9vD#z!o zDGy;s*$N>||8{0R)n#LI7^q)+TQ!Vvv`GcIKIo1O6ttWsP%j~EBM9RS5w+{l)HV}2 zy4v!%zrS8wS;IgTHr^YFYDq#*$>9t)BSC_$z|$3Y@~unrZxDgr6;(tN_HJAjcVXQ4m>v=O#^Jz~hoJxlv5A&@m2a>O7Yk{^$Ydt7#}n{YRcSexZGI@2XREmWa(Gyn zM=2yuGQZ|EQL9MFm!QuxNWtoK3Y7Ae zEby?yL+s*72chCwe^j)cNV$kVxfJ`93zn_7TaBzSpQL91Dn2<~ZSNjJileYt{SDBi ze5N|#vyIW>LI+3iJRIlx-3S!WL8uFRd-+kq7SlQfs8jkhXvZXm%0K?da^3KF?ae3m zG_Pk|x}44?0xvc(xJUsWDg|>ugr&Z#g?-!A=`wZB`6tM11S2v^nDFHaePMs;W@>AB zKRMQSf(ZsU7HjDSuYg9@)wPea(Z7Fa7;Cp-?EFN?V@4=37IQMqt*wK}pJ-{)z&tSh zVrQ6NP;iM0o{JH{6EKok7O$NAM?{?X?bV6gr(C&ul*7*aq?!j%RmIjF_zN}P((F9a z+E@SZeCa|(Wr@D}yXUY_`2Jl}qhl2bB`gqVw<@hyQ{S~QRuEq_lsKdMeV6bbnbIWi z2)3R9jqqdjRs-c-IQT!ywAFm0wyd~Au-UCuKpU)jr$M<1LW8uJDipP2v zMveyu^E9h0=8FeKU07RNdwHpu)nkq(%aMSKLXp3o{Mw6&Dq?fI$?Nns3AFu#v->l{ z=f@ubLZG~3wm?o*n|DQ3%XAZfp=;&tHt;6Cpj;dl2980W?sGKW^cD=+g@^w70s#dj zA}03w7Csz>dvLIjz`6pGjR?xhDvisPeLIwraji>+BG!H5&JOr_e?YwddFj9^sNlHZ z!iOipcewiNosWf!z%6vo1R@yj$=ZgRlBJu_nSXL6qXz<#-M69FJD!MGy1PUH!i?Uf z6S@%8V+!7cUdvm5pn|R3^mLXm0wY6sP9c>>;qsFPP8fyKdYVYTQKIvFt3My8A%b-h z1$6)c5fj(f&r7{FvkM{)y5K}+GWE<@SO$uNjNhx}cwcGw=zAtIK0iOZKR$r=b=S2@ z|Jt$9V@msXCrp%v!*mBaIU$$H>9Cs{z>Ejt3rbCb6_spczG&W-5p#+=vx7w1$jT@r z;dXS7t@C;D%(qvKm%?V(4|U!1xk1lILQdYt;K@wA@j~_Qj8BseZB`Uk)YCIxGv*F@Ecc+~+GMR= z0s~rc4BkX(SzQM@DRA9l;)H~&3-MQ$39gP7=zI_&-$07>4J}icJBtA`5=c6Bf4xlr z9}Gkie2(gtr~<;p%>KcD;p9mbDB==|)8Ih_tc|uC_q;$rmU~;iAt6!&cUX7EmNJ4= zHo!prLz7Q{93tQA2cITTur)7aB>ZBM;A)v)GBNhD-5CX_J8iJc8gH$o6S7*E2Va=5`} z$SEnDvmLYbd+$K3Xo|6^`8Tkf-1feIOk>7tcKioBa03)PL}>yQMFn=Pz+qTGGA7)^fEX%av{zD8Ks*vJ@Wjht3g&qpH%8|1Z4_meD2WV7oZ%7KBm`}3`WP_?2g`;SL= z-K}@9HBk^2117b%R4uqareK_1gjOC!bB`?($Sh2EEU;B3@RqDi&G?xKQlM%M(4aJ~zQ4Qc`MiQUNY2$Kenr>%UD=EDrYHNaqQc^kZu9`^lvw@O{A!)L*i3o_ z$8XUpf`xQbNCElHLWclIQlF2xckQQQ#WJ82smm_H#?xRhaLoB<*)L~;r_Dd?HE>X* zAiXHdfh^O~;^uZ4^-~D+Gyj6Zyz&e4!B!AYyIk19g|!1``x!o_JMe)mV~)+Szb8i} z5RoMi{#BZ!9TCyd(K%OAgY>4ydU|>)D%E+rBw^6t<)tgpt6_bIzbEBzT2dseIrYvL zPwk|;y8VLwUsK~;Aj`3IIa{_oErZNlf}Hig+lM$taP;MYe&GtT#s_#rsP z5*8qxzr5eU#=GlmSRwgbgRN=XiQbq7-5#hVt*SZ z7j^UySqWQS6R`mC`kis7Ad?;F)T_|a(v-Az4r^Q1&e&6i>z}*&HAk|LC81y^A&c-7 z`@GvXAjjQ?o)AXp`Ej`V7tYjDlbPQ!tbVEc@*S8`Em7uM>kR$Dz!{^KQl{6hhSPP_ z|I#Z7A~m|68B7+@cdsj}D|=oiR4D>Bd~831)quTh9$8o3^58*Z;aOb_Mi5W(6&a)0 zes$QpESB|J3KWT9zeYa0oI30ZfQtO_4zxt3&1Aer2K!y>p@IN|WSUHux5G2{g7c4` zKci;2-?@_{m(CoI+2Fy#Q15RnHwXE*1WFXkH|oZ^e>;{eO@KVGTFS6;0y{77hsz^O ztk`#0DHp%HyiYW&COq)FM+^)R6`M3@i>XnPmTeYS^W;8r7E~YvVjHnaNTijO0~hci zPKj5kNO*H{4F1{AQld4jTvDu-&z{YwF51vrrM9`#23XYXQw5Eo4w1oY)Ny)ll5pxc zeO(pwSDdFI;a`WFfWCpPZtkd-R#ka;E-9&-`~lOyIj~s!a$ccY88v2An`Ymh4vw0f z1qy)lW@mi`C{aaSA}IfoU!+_EiQ8dXU|-K{A5Wq6Vm9c5@g>I=A_77><0qkIx-Kg1 zA9uZ3L%FqgJ*PbdTjdHQjg=oohFMR`iO{`xStX`cn4=200hy>XP7F`-~FhFj1Ay-XD*J5)hDK4k8lB zfZO9?H9JeK|GV7yF3a}pmelCQEo`Ut$s_Tx7x>!AMIlakI7~?+B12OXgA*iWft0VK ztF8-dLL0P%lkt(;2f=G2O9oRgIEbq4=4R)=sf4eu)ek}>h$hrS*5j@7?b6IhRYLkd z$q0M6Jcu}$e3qxN*_f!gYJ07*9F|WN^ftX+%HD67#o?}qL{$}IulMAnI@vHxX&;;I zChi9EzT2WL2>ig7tNww3$qoIe<10Vy?Al(XW|p0;79nEf10CEy_O2uh%b=M8E<7;2 zuGdw$1eT@qJ))gfKBtL1e|x30{$@8EnMlQTf`Wd@Rl9Ej0>B<*FKsoNgsrT$8ZLBV z_m19;!#xv{YwVk#%Ku=M72Y~KFCE=eMDkI;K?mh{z3Ho~mu5ggo>p-Ph$BH9o$UPW z*;B!!r$?lxA+j5T1?$6=b%ZRh(W&J28JCPF08V}>C1-eKsNG83S0~1ScLxUM7Kj*) zbdi+Nv`~N5WQbc-P)!@D%QZc&7EJhsgG>*Z{Tk~3aSsMo%j*gWY>cJl^XUJJSdYNY zk!Y5@xRDAa8lzFGSM31_e##ZQ?RJ7&;wW6Ch>yv`#C|PnK^-ksT3H;>4 zFfk*is9mI1!2kd{Kpk2Pcb7A3C>Iz9?#X2wXPl(*OpQ-jc{$dzd5?nOz=(|f{sB#% zxy{5EpeME7G{^hS&#jRgWEVBl5`5SG&;RQehi-m|^*f(9si4bp81N1wL<)``F8 zyDDAeSW`cGY~l>^$G|$CU#C)L(_EIXV54HBNSTWv5`0g@ilC{xo7eISOp3t zSLc_4GbcAReHN&+G{0SYAKFOLB?Rg!FEcw_c!u)OXDw`(B8_}U4qjhhfehd2Esi>{ zmZ_``K06RQFsP8{ghL4uB#0Y4B76NxlRmE1r=)Wy%C*_QLz@mzBIkP>Rg_XIT1s-; z_+C7q{mvBOze9>QOPUBc3mGzyU-+>rt;I$BTU1 z)~zE^T4Y)x5`=)fktuBG2{^}3c0e1sEDoQseR@$vd2_OV*G8gMb$!VX1U%6~`6x|E zNlEPo8lYWpndANX0uufU-UT80g+jsg8=3f*QM4)V7M0C$AdPsJ5J@DpxLsjX5UH`T zDJd}_Ai(+00`yJXX9cFwXtQU{cxv@377JU`#Q>~PmmEuP%*%0w>jOh*lK&A!(H*>RLzH-fpj{x*#dgg$C>5@fjpL)WQdjSf=CC3gn zk5GtvRaRS_^XE@md3X0SxGBNFHVNPqJVHnvxl<@# zefg<**(C#7nY8laSrk)H-A-Vg*1S=p#c2cp?8Q@rWQYZP_L()7&X*dpZ&Y45)1Cfx zU^@hG86G4g2!c|E4F2fDcY}lQ_e<56&$U-L?rf^SD@Z}5P#o?M0O-WCgakp*u%Rh$ zy#6wiiJvaKv>VADWVJX+!q{xKW4!?Y#(12NAP9A&f#&xAsO`6X3bYhn6c2l_-2 zL{e35-VTbH1Dt{<3JHRs$jFGtS3E+a(abNk6y&y>hK}2UtiDF4RAL(Wd>)^VJzN3+ z9Kb_`1VPZWX;a^P<5k>C`Mb6%Tbu3bt-ZX=yfGm@&e72U048{@kRS*eF(P%uh*a}- z6oxC$owBPdwYc59k%o6f3jjEPM+*sppm*Qi7=c%vvZbiN_Rw@#t8KZPZ{8j|U_iGA z2mnA{AS4orv})xFBGK${2}R3K9k(TB2}RM;lSdG<(^xi(HGcdUXO0!nhw*eF-CN)d zPhGTdfq8pdNujtBXXk@KQeA-?a+{VmK`6j63<3ZS-~~d0An4IYmd3{7EW?`fyNJv~ zQM4`-Ck&QEBF&pS+j#>7^mV*LNE8ZX;evVQ?Txpy70r!SUf7&}yS4C+dHawdgK&$n z0|1WU9YTU2XzZBLxYhg%)Au5H`H(wmn@cT~keN1hin9j@=+k(OkRS+Jv}k^K7&cs7 z(OBPBT4a7RqSqB2*$3k}cMlqrG3bjx?hZqT4w*b@qFV+C=yMR< zP^{nS$mNP>);(WaTW8$J2?|j(HzGQm6`dhoaN`@VvRL@>f&f4_@yZ#A&1TJ=GYiuw ztE;i3B7uPa*ox&qMF7{~!lL3XWne4I!5uoql#X?G(IF4*q@~9IUjrQKV zZ^g&Ox_3~3K8hC$iO=UJCJwZ8_2msOj2J%5y(0qjUA$OG5CnzbrQQz;3jFlr55|oH z(XRma;oU--GI~09=h% z`r2V=Zx@5e2td!$8-xS^^f+KG1OPYzAS3|52>>Ah08Ri12>@^c{~rJV|Npp+J{4nR R=0.6.2"}, "dynamodb": aws_common | classification_lib, # Starting with 7.14.0 python client is checking if it is connected to elasticsearch client. If its not it throws @@ -592,6 +593,7 @@ "clickhouse-usage", "cockroachdb", "delta-lake", + "dremio", "druid", "elasticsearch", "feast", @@ -690,6 +692,7 @@ "s3 = datahub.ingestion.source.s3:S3Source", "dbt = datahub.ingestion.source.dbt.dbt_core:DBTCoreSource", "dbt-cloud = datahub.ingestion.source.dbt.dbt_cloud:DBTCloudSource", + "dremio = datahub.ingestion.source.dremio.dremio_source:DremioSource", "druid = datahub.ingestion.source.sql.druid:DruidSource", "dynamodb = datahub.ingestion.source.dynamodb.dynamodb:DynamoDBSource", "elasticsearch = datahub.ingestion.source.elastic_search:ElasticsearchSource", diff --git a/metadata-ingestion/src/datahub/ingestion/source/dremio/__init__.py b/metadata-ingestion/src/datahub/ingestion/source/dremio/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_api.py b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_api.py new file mode 100644 index 0000000000000..32440033843ed --- /dev/null +++ b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_api.py @@ -0,0 +1,721 @@ +import concurrent.futures +import json +import logging +import warnings +from collections import defaultdict +from enum import Enum +from itertools import product +from time import sleep, time +from typing import Any, Dict, List, Optional, Union +from urllib.parse import quote + +import requests +from urllib3.exceptions import InsecureRequestWarning + +from datahub.ingestion.source.dremio.dremio_config import DremioSourceConfig +from datahub.ingestion.source.dremio.dremio_datahub_source_mapping import ( + DremioToDataHubSourceTypeMapping, +) +from datahub.ingestion.source.dremio.dremio_sql_queries import DremioSQLQueries + +logger = logging.getLogger(__name__) + + +class DremioAPIException(Exception): + pass + + +class DremioEdition(Enum): + CLOUD = "CLOUD" + ENTERPRISE = "ENTERPRISE" + COMMUNITY = "COMMUNITY" + + +class DremioAPIOperations: + _retry_count: int = 5 + _timeout: int = 10 + + def __init__(self, connection_args: "DremioSourceConfig") -> None: + self.dremio_to_datahub_source_mapper = DremioToDataHubSourceTypeMapping() + self.allow_schema_pattern: List[str] = connection_args.schema_pattern.allow + self.allow_dataset_pattern: List[str] = connection_args.dataset_pattern.allow + self.deny_schema_pattern: List[str] = connection_args.schema_pattern.deny + self.deny_dataset_pattern: List[str] = connection_args.dataset_pattern.deny + self._password: Optional[str] = connection_args.password + self._max_workers: int = connection_args.max_workers + # Initialize headers after setting credentials + self.headers: Dict[str, str] = {} + + if connection_args.is_dremio_cloud: + self.is_dremio_cloud = True + self._is_PAT = True + self._verify = True + self.edition = DremioEdition.CLOUD + + cloud_region = connection_args.dremio_cloud_region + self.base_url = "https://api.dremio.cloud:443/v0" + if cloud_region != "US": + self.base_url = ( + f"https://api.{cloud_region.lower()}.dremio.cloud:443/v0" + ) + else: + host = connection_args.hostname + port = connection_args.port + tls = connection_args.tls + + self.username: Optional[str] = connection_args.username + + if not host: + raise ValueError( + "Hostname must be provided for on-premises Dremio instances." + ) + + protocol = "https" if tls else "http" + self.base_url = f"{protocol}://{host}:{port}/api/v3" + + self.is_dremio_cloud = False + self._is_PAT = connection_args.authentication_method == "PAT" + + self.set_connection_details(host=host, port=port, tls=tls) + + self._verify = tls and not connection_args.disable_certificate_verification + + if not self._verify: + warnings.simplefilter("ignore", InsecureRequestWarning) + + self.set_credentials() + + if self.test_for_enterprise_edition(): + self.edition = DremioEdition.ENTERPRISE + else: + self.edition = DremioEdition.COMMUNITY + + def set_connection_details(self, host: str, port: int, tls: bool) -> None: + protocol = "https" if tls else "http" + self.dremio_url = f"{protocol}://{host}:{port}" + + def set_credentials(self) -> None: + if self.is_dremio_cloud and self.base_url.endswith("dremio.cloud:443/v0"): + # Cloud instances handle authentication differently, possibly via PAT + return + + for retry in range(1, self._retry_count + 1): + logger.info(f"Dremio login attempt #{retry}") + try: + if self.__get_sticky_headers(): + logger.info("Dremio login successful.") + return + + except Exception as e: + logger.error(f"Dremio login failed on attempt #{retry}: {e}") + sleep(1) # Optional: exponential backoff can be implemented here + + raise DremioAPIException( + "Credentials cannot be refreshed. Please check your username and password." + ) + + def __get_sticky_headers(self) -> bool: + """ + Get authentication token and set headers. + Returns True if headers are set successfully, False otherwise. + """ + if self._is_PAT: + if not self._password: + logger.error("Personal Access Token is missing.") + return False + self.headers = { + "Content-Type": "application/json", + "Authorization": f"Bearer {self._password}", + } + return True + else: + if not self.username or not self._password: + logger.error("Username and password are required for authentication.") + return False + try: + response = self._login( + headers={"Content-Type": "application/json"}, + data=json.dumps( + {"userName": self.username, "password": self._password} + ), + ) + token = response.get("token") + if not token: + logger.error("Authentication token not found in the response.") + return False + self.headers = { + "Content-Type": "application/json", + "Authorization": f"_dremio{token}", + } + return True + except Exception as e: + logger.error(f"Failed to obtain authentication headers: {e}") + return False + + def get(self, url: str) -> Dict: + """execute a get request on dremio""" + response = requests.get( + url=(self.base_url + url), + headers=self.headers, + verify=self._verify, + timeout=self._timeout, + ) + return response.json() + + def post(self, url: str, data: str) -> Dict: + """execute a get request on dremio""" + response = requests.post( + url=(self.base_url + url), + headers=self.headers, + data=data, + verify=self._verify, + timeout=self._timeout, + ) + return response.json() + + def _login(self, headers: Dict, data: str) -> Dict: + """execute a get request on dremio""" + response = requests.post( + url=f"{self.dremio_url}/apiv2/login", + headers=headers, + data=data, + verify=self._verify, + timeout=self._timeout, + ) + response.raise_for_status() + return response.json() + + def execute_query(self, query: str, timeout: int = 300) -> List[Dict[str, Any]]: + """Execute SQL query with timeout and error handling""" + try: + response = self.post(url="/sql", data=json.dumps({"sql": query})) + + if "errorMessage" in response: + raise RuntimeError(f"SQL Error: {response['errorMessage']}") + + job_id = response["id"] + + with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor: + future = executor.submit(self.fetch_results, job_id) + try: + return future.result(timeout=timeout) + except concurrent.futures.TimeoutError: + self.cancel_query(job_id) + raise TimeoutError( + f"Query execution timed out after {timeout} seconds" + ) + + except requests.RequestException as e: + raise RuntimeError(f"Error executing query: {str(e)}") + + def fetch_results(self, job_id: str) -> List[Dict]: + """Fetch job results with status checking""" + start_time = time() + while True: + status = self.get_job_status(job_id) + if status["jobState"] == "COMPLETED": + break + elif status["jobState"] == "FAILED": + error_message = status.get("errorMessage", "Unknown error") + raise RuntimeError(f"Query failed: {error_message}") + elif status["jobState"] == "CANCELED": + raise RuntimeError("Query was canceled") + + if time() - start_time > 300: # 5 minutes timeout + self.cancel_query(job_id) + raise TimeoutError("Query execution timed out while fetching results") + + sleep(3) + + return self._fetch_all_results(job_id) + + def _fetch_all_results(self, job_id: str) -> List[Dict]: + """Fetch all results for a completed job""" + limit = 500 + offset = 0 + rows = [] + + while True: + result = self.get_job_result(job_id, offset, limit) + rows.extend(result["rows"]) + + offset = offset + limit + if offset >= result["rowCount"]: + break + + return rows + + def cancel_query(self, job_id: str) -> None: + """Cancel a running query""" + try: + self.post(url=f"/job/{job_id}/cancel", data=json.dumps({})) + except Exception as e: + logger.error(f"Failed to cancel query {job_id}: {str(e)}") + + def get_job_status(self, job_id: str) -> Dict[str, Any]: + """Check job status""" + return self.get( + url=f"/job/{job_id}/", + ) + + def get_job_result( + self, job_id: str, offset: int = 0, limit: int = 500 + ) -> Dict[str, Any]: + """Get job results in batches""" + return self.get( + url=f"/job/{job_id}/results?offset={offset}&limit={limit}", + ) + + def get_dataset_id(self, schema: str, dataset: str) -> Optional[str]: + """Retrieve the dataset ID based on schema and dataset name.""" + schema_split = schema.split(".") + schema_str = "" + last_val = 0 + + for increment_val in range(1, len(schema_split) + 1): + current_path = ".".join(schema_split[last_val:increment_val]) + url_encoded = quote(current_path, safe="") + response = self.get(url=f"/catalog/by-path/{schema_str}/{url_encoded}") + + if not response.get("errorMessage"): + last_val = increment_val + schema_str = ( + f"{schema_str}/{url_encoded}" if schema_str else url_encoded + ) + + dataset_response = self.get( + url=f"/catalog/by-path/{schema_str}/{quote(dataset, safe='')}", + ) + dataset_id = dataset_response.get("id") + if not dataset_id: + logger.error(f"Dataset ID not found for {schema}.{dataset}") + + return dataset_id + + def community_get_formatted_tables( + self, tables_and_columns: List[Dict[str, Any]] + ) -> List[Dict[str, Any]]: + schema_list = [] + schema_dict_lookup = [] + dataset_list = [] + column_dictionary: Dict[str, List[Dict]] = defaultdict(list) + + for record in tables_and_columns: + if not record.get("COLUMN_NAME"): + continue + + table_full_path = record.get("FULL_TABLE_PATH") + if not table_full_path: + continue + + column_dictionary[table_full_path].append( + { + "name": record["COLUMN_NAME"], + "ordinal_position": record["ORDINAL_POSITION"], + "is_nullable": record["IS_NULLABLE"], + "data_type": record["DATA_TYPE"], + "column_size": record["COLUMN_SIZE"], + } + ) + + if record.get("TABLE_SCHEMA") not in schema_list: + schema_list.append(record.get("TABLE_SCHEMA")) + + distinct_tables_list = list( + { + tuple( + dictionary[key] + for key in ( + "TABLE_SCHEMA", + "TABLE_NAME", + "FULL_TABLE_PATH", + "VIEW_DEFINITION", + ) + if key in dictionary + ): dictionary + for dictionary in tables_and_columns + }.values() + ) + + for schema in schema_list: + schema_dict_lookup.append(self.validate_schema_format(schema)) + + for table, schemas in product(distinct_tables_list, schema_dict_lookup): + if table.get("TABLE_SCHEMA") == schemas.get("original_path"): + dataset_list.append( + { + "TABLE_SCHEMA": "[" + + ", ".join( + schemas.get("formatted_path") + [table.get("TABLE_NAME")] + ) + + "]", + "TABLE_NAME": table.get("TABLE_NAME"), + "COLUMNS": column_dictionary.get( + table.get("FULL_TABLE_PATH", "") + ), + "VIEW_DEFINITION": table.get("VIEW_DEFINITION"), + "RESOURCE_ID": self.get_dataset_id( + schema=".".join(schemas.get("formatted_path")), + dataset=table.get("TABLE_NAME", ""), + ), + "LOCATION_ID": self.get_dataset_id( + schema=".".join(schemas.get("formatted_path")), + dataset="", + ), + } + ) + + return dataset_list + + def get_all_tables_and_columns(self) -> List[Dict]: + if self.edition == DremioEdition.ENTERPRISE: + query_template = DremioSQLQueries.QUERY_DATASETS_EE + elif self.edition == DremioEdition.CLOUD: + query_template = DremioSQLQueries.QUERY_DATASETS_CLOUD + else: + query_template = DremioSQLQueries.QUERY_DATASETS_CE + + def get_pattern_condition( + patterns: Union[str, List[str]], field: str, allow: bool = True + ) -> str: + if not patterns: + return "" + + if isinstance(patterns, str): + patterns = [patterns.upper()] + + if ".*" in patterns and allow: + return "" + + patterns = [p.upper() for p in patterns if p != ".*"] + if not patterns: + return "" + + operator = "REGEXP_LIKE" if allow else "NOT REGEXP_LIKE" + pattern_str = "|".join(f"({p})" for p in patterns) + return f"AND {operator}({field}, '{pattern_str}')" + + schema_field = "CONCAT(REPLACE(REPLACE(REPLACE(UPPER(TABLE_SCHEMA), ', ', '.'), '[', ''), ']', ''))" + table_field = "UPPER(TABLE_NAME)" + + schema_condition = get_pattern_condition( + self.allow_schema_pattern, schema_field + ) + table_condition = get_pattern_condition(self.allow_dataset_pattern, table_field) + deny_schema_condition = get_pattern_condition( + self.deny_schema_pattern, schema_field, allow=False + ) + deny_table_condition = get_pattern_condition( + self.deny_dataset_pattern, table_field, allow=False + ) + + formatted_query = query_template.format( + schema_pattern=schema_condition, + table_pattern=table_condition, + deny_schema_pattern=deny_schema_condition, + deny_table_pattern=deny_table_condition, + ) + + all_tables_and_columns = self.execute_query(formatted_query) + tables = [] + + if self.edition == DremioEdition.COMMUNITY: + tables = self.community_get_formatted_tables(all_tables_and_columns) + + else: + column_dictionary: Dict[str, List[Dict]] = defaultdict(list) + + for record in all_tables_and_columns: + if not record.get("COLUMN_NAME"): + continue + + table_full_path = record.get("FULL_TABLE_PATH") + if not table_full_path: + continue + + column_dictionary[table_full_path].append( + { + "name": record["COLUMN_NAME"], + "ordinal_position": record["ORDINAL_POSITION"], + "is_nullable": record["IS_NULLABLE"], + "data_type": record["DATA_TYPE"], + "column_size": record["COLUMN_SIZE"], + } + ) + + distinct_tables_list = list( + { + tuple( + dictionary[key] + for key in ( + "TABLE_SCHEMA", + "TABLE_NAME", + "FULL_TABLE_PATH", + "VIEW_DEFINITION", + "LOCATION_ID", + "OWNER", + "OWNER_TYPE", + "CREATED", + "FORMAT_TYPE", + ) + if key in dictionary + ): dictionary + for dictionary in all_tables_and_columns + }.values() + ) + + for table in distinct_tables_list: + tables.append( + { + "TABLE_NAME": table.get("TABLE_NAME"), + "TABLE_SCHEMA": table.get("TABLE_SCHEMA"), + "COLUMNS": column_dictionary[table["FULL_TABLE_PATH"]], + "VIEW_DEFINITION": table.get("VIEW_DEFINITION"), + "RESOURCE_ID": table.get("RESOURCE_ID"), + "LOCATION_ID": table.get("LOCATION_ID"), + "OWNER": table.get("OWNER"), + "OWNER_TYPE": table.get("OWNER_TYPE"), + "CREATED": table.get("CREATED"), + "FORMAT_TYPE": table.get("FORMAT_TYPE"), + } + ) + + return tables + + def validate_schema_format(self, schema): + + if "." in schema: + schema_path = self.get( + url=f"/catalog/{self.get_dataset_id(schema=schema, dataset='')}" + ).get("path") + return {"original_path": schema, "formatted_path": schema_path} + return {"original_path": schema, "formatted_path": [schema]} + + def test_for_enterprise_edition(self): + response = requests.get( + url=f"{self.base_url}/catalog/privileges", + headers=self.headers, + verify=self._verify, + timeout=self._timeout, + ) + + if response.status_code == 200: + return True + + return False + + def get_view_parents(self, dataset_id: str) -> List: + parents_list = [] + + if self.edition == DremioEdition.ENTERPRISE: + parents = self.get( + url=f"/catalog/{dataset_id}/graph", + ).get("parents") + + if not parents: + return [] + + for parent in parents: + parents_list.append(".".join(parent.get("path"))) + + return parents_list + + def extract_all_queries(self) -> List[Dict[str, Any]]: + if self.edition == DremioEdition.CLOUD: + jobs_query = DremioSQLQueries.QUERY_ALL_JOBS_CLOUD + else: + jobs_query = DremioSQLQueries.QUERY_ALL_JOBS + + return self.execute_query(query=jobs_query) + + def get_source_by_id(self, source_id: str) -> Optional[Dict]: + """ + Fetch source details by ID. + """ + response = self.get( + url=f"/source/{source_id}", + ) + return response if response else None + + def get_source_for_dataset(self, schema: str, dataset: str) -> Optional[Dict]: + """ + Get source information for a dataset given its schema and name. + """ + dataset_id = self.get_dataset_id(schema, dataset) + if not dataset_id: + return None + + catalog_entry = self.get( + url=f"/catalog/{dataset_id}", + ) + if not catalog_entry or "path" not in catalog_entry: + return None + + source_id = catalog_entry["path"][0] + return self.get_source_by_id(source_id) + + def get_tags_for_resource(self, resource_id: str) -> Optional[List[str]]: + """ + Get Dremio tags for a given resource_id. + """ + + try: + tags = self.get( + url=f"/catalog/{resource_id}/collaboration/tag", + ) + return tags.get("tags") + except Exception as exc: + logging.info( + "Resource ID {} has no tags: {}".format( + resource_id, + exc, + ) + ) + return None + + def get_description_for_resource(self, resource_id: str) -> Optional[str]: + """ + Get Dremio wiki entry for a given resource_id. + """ + + try: + tags = self.get( + url=f"/catalog/{resource_id}/collaboration/wiki", + ) + return tags.get("text") + except Exception as exc: + logging.info( + "Resource ID {} has no wiki entry: {}".format( + resource_id, + exc, + ) + ) + return None + + def get_source_type( + self, + dremio_source_type: str, + datahub_source_type: Optional[str], + ) -> Optional[str]: + """ + Get Dremio wiki entry for a given resource_id. + """ + + lookup_datahub_source_type = ( + self.dremio_to_datahub_source_mapper.get_datahub_source_type( + dremio_source_type=dremio_source_type, + ) + ) + + if lookup_datahub_source_type: + return lookup_datahub_source_type + + self.dremio_to_datahub_source_mapper.add_mapping( + dremio_source_type=dremio_source_type, + datahub_source_type=datahub_source_type, + ) + return datahub_source_type + + def get_source_category( + self, + dremio_source_type: str, + ) -> Optional[str]: + """ + Get Dremio wiki entry for a given resource_id. + """ + + return self.dremio_to_datahub_source_mapper.get_category( + source_type=dremio_source_type, + ) + + def get_containers_for_location( + self, resource_id: str, path: List[str] + ) -> List[Dict[str, str]]: + containers = [] + + def traverse_path(location_id: str, entity_path: List[str]) -> List: + nonlocal containers + try: + response = self.get(url=f"/catalog/{location_id}") + + if response.get("entityType") == "folder": + containers.append( + { + "id": location_id, + "name": entity_path[-1], + "path": entity_path[:-1], + "container_type": "FOLDER", + } + ) + + for container in response.get("children", []): + if container.get("type") == "CONTAINER": + traverse_path(container.get("id"), container.get("path")) + + except Exception as exc: + logging.info( + "Location {} contains no tables or views. Skipping...".format(id) + ) + logging.info("Error message: {}".format(exc)) + + return containers + + return traverse_path(location_id=resource_id, entity_path=path) + + def get_all_containers(self): + """ + Query the Dremio sources API and return source information. + """ + containers = [] + + response = self.get(url="/catalog") + + def process_source(source): + if source.get("containerType") == "SOURCE": + source_config = self.get( + url=f"/catalog/{source.get('id')}", + ) + + if source_config.get("config", {}).get("database"): + db = source_config.get("config", {}).get("database") + else: + db = source_config.get("config", {}).get("databaseName", "") + + return { + "id": source.get("id"), + "name": source.get("path")[0], + "path": [], + "container_type": "SOURCE", + "source_type": source_config.get("type"), + "root_path": source_config.get("config", {}).get("rootPath"), + "database_name": db, + } + else: + return { + "id": source.get("id"), + "name": source.get("path")[0], + "path": [], + "container_type": "SPACE", + } + + def process_source_and_containers(source): + container = process_source(source) + sub_containers = self.get_containers_for_location( + resource_id=container.get("id"), + path=[container.get("name")], + ) + return [container] + sub_containers + + # Use ThreadPoolExecutor to parallelize the processing of sources + with concurrent.futures.ThreadPoolExecutor( + max_workers=self._max_workers + ) as executor: + future_to_source = { + executor.submit(process_source_and_containers, source): source + for source in response.get("data", []) + } + + for future in concurrent.futures.as_completed(future_to_source): + containers.extend(future.result()) + + return containers diff --git a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_aspects.py b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_aspects.py new file mode 100644 index 0000000000000..77d136d44ec68 --- /dev/null +++ b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_aspects.py @@ -0,0 +1,561 @@ +"""This Module contains controller functions for dremio source""" + +__author__ = "Shabbir Mohammed Hussain, Shehroz Abdullah, Hamza Rehman, Jonny Dixon" + +import logging +import time +import uuid +from datetime import datetime +from typing import Dict, Iterable, List, Optional, Tuple, Type, Union + +from datahub._codegen.aspect import _Aspect +from datahub.emitter.mce_builder import ( + make_dataplatform_instance_urn, + make_domain_urn, + make_group_urn, + make_user_urn, +) +from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.emitter.mcp_builder import ContainerKey +from datahub.ingestion.api.workunit import MetadataWorkUnit +from datahub.ingestion.source.dremio.dremio_entities import ( + DremioContainer, + DremioDataset, + DremioDatasetColumn, + DremioDatasetType, + DremioGlossaryTerm, +) +from datahub.ingestion.source.dremio.dremio_profiling import DremioProfiler +from datahub.metadata.schema_classes import ( + ArrayTypeClass, + AuditStampClass, + BooleanTypeClass, + BrowsePathEntryClass, + BrowsePathsV2Class, + BytesTypeClass, + ContainerClass, + ContainerPropertiesClass, + DataPlatformInstanceClass, + DatasetFieldProfileClass, + DatasetProfileClass, + DatasetPropertiesClass, + DateTypeClass, + DomainsClass, + FabricTypeClass, + GlossaryTermAssociationClass, + GlossaryTermInfoClass, + GlossaryTermsClass, + MySqlDDLClass, + NullTypeClass, + NumberTypeClass, + OwnerClass, + OwnershipClass, + OwnershipTypeClass, + QuantileClass, + RecordTypeClass, + SchemaFieldClass, + SchemaFieldDataTypeClass, + SchemaMetadataClass, + StatusClass, + StringTypeClass, + SubTypesClass, + TimeStampClass, + TimeTypeClass, + ViewPropertiesClass, +) + +logger = logging.getLogger(__name__) +namespace = uuid.NAMESPACE_DNS + + +class DremioContainerKey(ContainerKey): + key: str + + +class SchemaFieldTypeMapper: + + FIELD_TYPE_MAPPING: Dict[str, Type] = { + # Bool + "boolean": BooleanTypeClass, + # Binary + "binary varying": BytesTypeClass, + # Numbers + "decimal": NumberTypeClass, + "integer": NumberTypeClass, + "bigint": NumberTypeClass, + "float": NumberTypeClass, + "double": NumberTypeClass, + # Dates and times + "timestamp": DateTypeClass, + "date": DateTypeClass, + "time": TimeTypeClass, + # Strings + "char": StringTypeClass, + "character": StringTypeClass, + "character varying": StringTypeClass, + # Records + "row": RecordTypeClass, + "struct": RecordTypeClass, + "list": RecordTypeClass, + "map": RecordTypeClass, + # Arrays + "array": ArrayTypeClass, + } + + @classmethod + def get_field_type( + cls, data_type: str, data_size: Optional[int] = None + ) -> Tuple["SchemaFieldDataTypeClass", str]: + """ + Maps a Dremio data type and size to a DataHub SchemaFieldDataTypeClass and native data type string. + + :param data_type: The data type string from Dremio. + :param data_size: The size of the data type, if applicable. + :return: A tuple containing a SchemaFieldDataTypeClass instance and the native data type string. + """ + if not data_type: + logger.warning("Empty data_type provided, defaulting to NullTypeClass.") + type_class = NullTypeClass + native_data_type = "NULL" + else: + data_type = data_type.lower() + type_class = cls.FIELD_TYPE_MAPPING.get(data_type, NullTypeClass) + + if data_size: + native_data_type = f"{data_type}({data_size})" + else: + native_data_type = data_type + + try: + schema_field_type = SchemaFieldDataTypeClass(type=type_class()) + logger.debug( + f"Mapped data_type '{data_type}' with size '{data_size}' to type class " + f"'{type_class.__name__}' and native data type '{native_data_type}'." + ) + except Exception as e: + logger.error( + f"Error initializing SchemaFieldDataTypeClass with type '{type_class.__name__}': {e}" + ) + schema_field_type = SchemaFieldDataTypeClass(type=NullTypeClass()) + + return schema_field_type, native_data_type + + +class DremioAspects: + def __init__( + self, + platform: str, + profiler: DremioProfiler, + base_url: str, + domain: Optional[str] = None, + platform_instance: Optional[str] = None, + env: Optional[Union[FabricTypeClass, str]] = FabricTypeClass.PROD, + profiling_enabled: bool = False, + ): + self.platform = platform + self.platform_instance = platform_instance + self.env = env + self.domain = domain + self.profiler = profiler + self.profiling_enabled = profiling_enabled + self.base_url = base_url + + def get_container_key( + self, name: str, path: Optional[List[str]] + ) -> DremioContainerKey: + return DremioContainerKey( + platform=self.platform, + instance=self.platform_instance, + env=str(self.env), + key="".join(path) + name if path else name, + ) + + def get_container_urn(self, name: str, path: Optional[List[str]]) -> str: + container_key = self.get_container_key(name, path) + return container_key.as_urn() + + def create_domain_aspect(self) -> Optional[_Aspect]: + if self.domain: + if self.domain.startswith("urn:li:domain:"): + return DomainsClass(domains=[self.domain]) + return DomainsClass( + domains=[ + make_domain_urn( + str(uuid.uuid5(namespace, self.domain)), + ) + ] + ) + return None + + def populate_container_mcp( + self, container_urn: str, container: DremioContainer + ) -> Iterable[MetadataWorkUnit]: + # Container Properties + container_properties = self._create_container_properties(container) + mcp = MetadataChangeProposalWrapper( + entityUrn=container_urn, + aspect=container_properties, + aspectName=ContainerPropertiesClass.ASPECT_NAME, + ) + yield mcp.as_workunit() + + # Browse Paths V2 + browse_paths_v2 = self._create_browse_paths(container) + if browse_paths_v2: + mcp = MetadataChangeProposalWrapper( + entityUrn=container_urn, + aspect=browse_paths_v2, + ) + yield mcp.as_workunit() + + # Container Class + container_class = self._create_container_class(container) + if container_class: + mcp = MetadataChangeProposalWrapper( + entityUrn=container_urn, + aspect=container_class, + ) + yield mcp.as_workunit() + + # Data Platform Instance + data_platform_instance = self._create_data_platform_instance() + if data_platform_instance: + mcp = MetadataChangeProposalWrapper( + entityUrn=container_urn, + aspect=data_platform_instance, + ) + yield mcp.as_workunit() + + # SubTypes + subtypes = SubTypesClass(typeNames=[container.subclass]) + mcp = MetadataChangeProposalWrapper( + entityUrn=container_urn, + aspect=subtypes, + ) + yield mcp.as_workunit() + + # Status + status = StatusClass(removed=False) + mcp = MetadataChangeProposalWrapper( + entityUrn=container_urn, + aspect=status, + ) + yield mcp.as_workunit() + + def populate_dataset_mcp( + self, dataset_urn: str, dataset: DremioDataset + ) -> Iterable[MetadataWorkUnit]: + # Dataset Properties + dataset_properties = self._create_dataset_properties(dataset) + mcp = MetadataChangeProposalWrapper( + entityUrn=dataset_urn, + aspect=dataset_properties, + ) + yield mcp.as_workunit() + + # Ownership + ownership = self._create_ownership(dataset) + if ownership: + mcp = MetadataChangeProposalWrapper( + entityUrn=dataset_urn, + aspect=ownership, + ) + yield mcp.as_workunit() + + # SubTypes + subtypes = SubTypesClass(typeNames=[dataset.dataset_type.value]) + mcp = MetadataChangeProposalWrapper( + entityUrn=dataset_urn, + aspect=subtypes, + ) + yield mcp.as_workunit() + + # Data Platform Instance + data_platform_instance = self._create_data_platform_instance() + mcp = MetadataChangeProposalWrapper( + entityUrn=dataset_urn, + aspect=data_platform_instance, + ) + yield mcp.as_workunit() + + # Browse Paths V2 + browse_paths_v2 = self._create_browse_paths(dataset) + if browse_paths_v2: + mcp = MetadataChangeProposalWrapper( + entityUrn=dataset_urn, + aspect=browse_paths_v2, + ) + yield mcp.as_workunit() + + # Container Class + container_class = self._create_container_class(dataset) + if container_class: + mcp = MetadataChangeProposalWrapper( + entityUrn=dataset_urn, + aspect=container_class, + ) + yield mcp.as_workunit() + + # Glossary Terms + if dataset.glossary_terms: + glossary_terms = self._create_glossary_terms(dataset) + mcp = MetadataChangeProposalWrapper( + entityUrn=dataset_urn, + aspect=glossary_terms, + ) + yield mcp.as_workunit() + + # Schema Metadata + if dataset.columns: + schema_metadata = self._create_schema_metadata(dataset) + mcp = MetadataChangeProposalWrapper( + entityUrn=dataset_urn, + aspect=schema_metadata, + ) + yield mcp.as_workunit() + + if self.profiling_enabled: + profile_data = dataset.get_profile_data(self.profiler) + profile_aspect = self.populate_profile_aspect(profile_data) + if profile_aspect: + mcp = MetadataChangeProposalWrapper( + entityUrn=dataset_urn, + aspect=profile_aspect, + ) + yield mcp.as_workunit() + else: + logger.warning( + f"Dataset {dataset.path}.{dataset.resource_name} has not been queried in Dremio" + ) + logger.warning( + f"Dataset {dataset.path}.{dataset.resource_name} will have a null schema" + ) + + # Status + status = StatusClass(removed=False) + mcp = MetadataChangeProposalWrapper( + entityUrn=dataset_urn, + aspect=status, + ) + yield mcp.as_workunit() + + def populate_glossary_term_mcp( + self, glossary_term: DremioGlossaryTerm + ) -> Iterable[MetadataWorkUnit]: + glossary_term_info = self._create_glossary_term_info(glossary_term) + mcp = MetadataChangeProposalWrapper( + entityUrn=glossary_term.urn, + aspect=glossary_term_info, + ) + yield mcp.as_workunit() + + def populate_profile_aspect(self, profile_data: Dict) -> DatasetProfileClass: + field_profiles = [ + self._create_field_profile(field_name, field_stats) + for field_name, field_stats in profile_data.get("column_stats", {}).items() + ] + return DatasetProfileClass( + timestampMillis=round(time.time() * 1000), + rowCount=profile_data.get("row_count"), + columnCount=profile_data.get("column_count"), + fieldProfiles=field_profiles, + ) + + def _create_container_properties( + self, container: DremioContainer + ) -> ContainerPropertiesClass: + return ContainerPropertiesClass( + name=container.container_name, + qualifiedName=f"{'.'.join(container.path) + '.' if container.path else ''}{container.container_name}", + description=container.description, + env=self.env, + ) + + def _create_browse_paths( + self, entity: Union[DremioContainer, DremioDataset] + ) -> Optional[BrowsePathsV2Class]: + paths = [] + + if self.platform_instance: + paths.append( + BrowsePathEntryClass( + id=self.platform_instance, + ) + ) + + if entity.path: + for browse_path_level in range(len(entity.path)): + paths.append( + BrowsePathEntryClass( + id=entity.path[browse_path_level], + urn=self.get_container_urn( + name=entity.container_name + if hasattr(entity, "container_name") + else "", + path=entity.path[: browse_path_level + 1], + ), + ) + ) + + if paths: + return BrowsePathsV2Class(path=paths) + return None + + def _create_container_class( + self, entity: Union[DremioContainer, DremioDataset] + ) -> Optional[ContainerClass]: + if entity.path: + return ContainerClass( + container=self.get_container_urn( + path=entity.path, + name="", + ) + ) + return None + + def _create_data_platform_instance(self) -> DataPlatformInstanceClass: + return DataPlatformInstanceClass( + platform=f"urn:li:dataPlatform:{self.platform}", + instance=( + make_dataplatform_instance_urn(self.platform, self.platform_instance) + if self.platform_instance + else None + ), + ) + + def _create_dataset_properties( + self, dataset: DremioDataset + ) -> DatasetPropertiesClass: + return DatasetPropertiesClass( + name=dataset.resource_name, + qualifiedName=f"{'.'.join(dataset.path)}.{dataset.resource_name}", + description=dataset.description, + externalUrl=self._create_external_url(dataset=dataset), + created=TimeStampClass( + time=round( + datetime.strptime( + dataset.created, "%Y-%m-%d %H:%M:%S.%f" + ).timestamp() + * 1000 + ) + if hasattr(dataset, "created") + else 0, + ), + ) + + def _create_external_url(self, dataset: DremioDataset) -> str: + container_type = "source" + dataset_url_path = '"' + dataset.path[0] + '"/' + + if len(dataset.path) > 1: + dataset_url_path = ( + dataset_url_path + '"' + '"."'.join(dataset.path[1:]) + '".' + ) + + if dataset.dataset_type == DremioDatasetType.VIEW: + container_type = "space" + elif dataset.path[0].startswith("@"): + container_type = "home" + + return f'{self.base_url}/{container_type}/{dataset_url_path}"{dataset.resource_name}"' + + def _create_ownership(self, dataset: DremioDataset) -> Optional[OwnershipClass]: + if not dataset.owner: + return None + owner = ( + make_user_urn(dataset.owner) + if dataset.owner_type == "USER" + else make_group_urn(dataset.owner) + ) + return OwnershipClass( + owners=[ + OwnerClass( + owner=owner, + type=OwnershipTypeClass.TECHNICAL_OWNER, + ) + ] + ) + + def _create_glossary_terms(self, entity: DremioDataset) -> GlossaryTermsClass: + return GlossaryTermsClass( + terms=[ + GlossaryTermAssociationClass(urn=term.urn) + for term in entity.glossary_terms + ], + auditStamp=AuditStampClass( + time=round(time.time() * 1000), + actor="urn:li:corpuser:admin", + ), + ) + + def _create_schema_metadata(self, dataset: DremioDataset) -> SchemaMetadataClass: + return SchemaMetadataClass( + schemaName=f"{'.'.join(dataset.path)}.{dataset.resource_name}", + platform=f"urn:li:dataPlatform:{self.platform}", + version=0, + fields=[self._create_schema_field(column) for column in dataset.columns], + platformSchema=MySqlDDLClass(""), + hash="", + ) + + def _create_schema_field(self, column: DremioDatasetColumn) -> SchemaFieldClass: + type_class, native_data_type = SchemaFieldTypeMapper.get_field_type( + column.data_type, + column.column_size, + ) + return SchemaFieldClass( + fieldPath=column.name, + type=type_class, + nativeDataType=native_data_type, + nullable=column.is_nullable == "YES", + ) + + def _get_profile_data(self, dataset: DremioDataset) -> Dict: + return self.profiler.profile_table( + f"{'.'.join(dataset.path)}.{dataset.resource_name}", + [(col.name, col.data_type) for col in dataset.columns], + ) + + def _create_view_properties( + self, dataset: DremioDataset + ) -> Optional[ViewPropertiesClass]: + if not dataset.sql_definition: + return None + return ViewPropertiesClass( + materialized=False, + viewLanguage="SQL", + viewLogic=dataset.sql_definition, + ) + + def _create_glossary_term_info( + self, glossary_term: DremioGlossaryTerm + ) -> GlossaryTermInfoClass: + return GlossaryTermInfoClass( + definition="", + termSource=self.platform, + name=glossary_term.glossary_term, + ) + + def _create_field_profile( + self, field_name: str, field_stats: Dict + ) -> DatasetFieldProfileClass: + quantiles = field_stats.get("quantiles") + return DatasetFieldProfileClass( + fieldPath=field_name, + uniqueCount=field_stats.get("distinct_count"), + nullCount=field_stats.get("null_count"), + min=str(field_stats.get("min")) if field_stats.get("min") else None, + max=str(field_stats.get("max")) if field_stats.get("max") else None, + mean=str(field_stats.get("mean")) if field_stats.get("mean") else None, + median=str(field_stats.get("median")) + if field_stats.get("median") + else None, + stdev=str(field_stats.get("stdev")) if field_stats.get("stdev") else None, + quantiles=[ + QuantileClass(quantile=str(0.25), value=str(quantiles[0])), + QuantileClass(quantile=str(0.75), value=str(quantiles[1])), + ] + if quantiles + else None, + sampleValues=field_stats.get("sample_values"), + ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_config.py b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_config.py new file mode 100644 index 0000000000000..3ac18f6e1113e --- /dev/null +++ b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_config.py @@ -0,0 +1,197 @@ +import os +from typing import List, Literal, Optional + +import certifi +from pydantic import Field, validator + +from datahub.configuration.common import AllowDenyPattern, ConfigModel +from datahub.ingestion.source.ge_profiling_config import GEProfilingConfig +from datahub.ingestion.source.state.stateful_ingestion_base import ( + StatefulIngestionConfigBase, +) +from datahub.metadata.schema_classes import FabricTypeClass + + +class ProfileConfig(GEProfilingConfig): + partition_profiling_enabled: bool = Field( + default=False, + description="Partition profiling disabled for Dremio.", + ) + include_field_median_value: bool = Field( + default=False, + description="Median causes a number of issues in Dremio.", + ) + query_timeout: int = Field( + default=300, description="Time before cancelling Dremio profiling query" + ) + + row_count: bool = True + column_count: bool = True + sample_values: bool = True + + +class DremioSourceMapping(ConfigModel): + platform: Optional[str] = Field( + default=None, + description="Source connection made by Dremio (e.g. S3, Snowflake)", + ) + platform_name: Optional[str] = Field( + default=None, + description="Alias of platform in Dremio connection", + ) + platform_instance: Optional[str] = Field( + default=None, + description="Platform instance of source connection in Datahub", + ) + dremio_source_type: Optional[str] = Field( + default=None, + description="Source connection made by Dremio (e.g. S3, Snowflake)", + ) + env: Optional[str] = Field( + default=FabricTypeClass.PROD, + description="ENV in Datahub of source connection made by Dremio (e.g. PROD)", + ) + root_path: Optional[str] = Field( + default=None, + description="Root path of source - Extracted from Dremio API", + hidden_from_docs=True, + ) + database_name: Optional[str] = Field( + default=None, + description="Database of source - Extracted from Dremio API", + hidden_from_docs=True, + ) + + +class DremioSourceConfig(ConfigModel, StatefulIngestionConfigBase): + + # Dremio Connection Details + hostname: Optional[str] = Field( + default=None, + description="Hostname or IP Address of the Dremio server", + ) + + port: int = Field( + default=9047, + description="Port of the Dremio REST API", + ) + + username: Optional[str] = Field( + default=None, + description="Dremio username", + ) + + authentication_method: Optional[str] = Field( + default="password", + description="Authentication method: 'password' or 'PAT' (Personal Access Token)", + ) + + password: Optional[str] = Field( + default=None, + description="Dremio password or Personal Access Token", + ) + + tls: bool = Field( + default=True, + description="Whether the Dremio REST API port is encrypted", + ) + + disable_certificate_verification: Optional[bool] = Field( + default=False, + description="Disable TLS certificate verification", + ) + + path_to_certificates: str = Field( + default=certifi.where(), + description="Path to SSL certificates", + ) + + # Dremio Cloud specific configs + is_dremio_cloud: Optional[bool] = Field( + default=False, + description="Whether this is a Dremio Cloud instance", + ) + dremio_cloud_region: Literal["US", "EMEA"] = Field( + default="US", + description="Dremio Cloud region ('US' or 'EMEA')", + ) + + # DataHub Environment details + env: str = Field( + default=FabricTypeClass.PROD, + description="Environment to use in namespace when constructing URNs.", + ) + + platform_instance: Optional[str] = Field( + default=None, + description="The instance of the platform that all assets produced by this recipe belong to. " + "This should be unique within the platform. " + "See https://datahubproject.io/docs/platform-instances/ for more details.", + ) + + domain: Optional[str] = Field( + default=None, + description="Domain for all source objects.", + ) + source_mappings: Optional[List[DremioSourceMapping]] = Field( + default=None, + description="Mappings from Dremio sources to DataHub platforms and datasets.", + ) + + # Entity Filters + schema_pattern: AllowDenyPattern = Field( + default=AllowDenyPattern.allow_all(), + description="Regex patterns for schemas to filter", + ) + dataset_pattern: AllowDenyPattern = Field( + default=AllowDenyPattern.allow_all(), + description="Regex patterns for schemas to filter", + ) + + # Profiling + profile_pattern: AllowDenyPattern = Field( + default=AllowDenyPattern.allow_all(), + description="Regex patterns for tables to profile", + ) + profiling: ProfileConfig = Field( + default=ProfileConfig(), + description="Configuration for profiling", + ) + + # Advanced Configs + max_workers: int = Field( + default=5 * (os.cpu_count() or 4), + description="Number of worker threads to use for parallel processing", + ) + + include_query_lineage: bool = Field( + default=False, + description="Whether to include query-based lineage information.", + ) + + include_table_rename_lineage: bool = Field( + default=True, + description="Whether to include table rename lineage", + ) + + include_copy_lineage: bool = Field( + default=True, + description="Whether to include copy lineage", + ) + + @validator("authentication_method") + def validate_auth_method(cls, value): + allowed_methods = ["password", "PAT"] + if value not in allowed_methods: + raise ValueError( + f"authentication_method must be one of {allowed_methods}", + ) + return value + + @validator("password") + def validate_password(cls, value, values): + if values.get("authentication_method") == "PAT" and not value: + raise ValueError( + "Password (Personal Access Token) is required when using PAT authentication", + ) + return value diff --git a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py new file mode 100644 index 0000000000000..70a61d40bda3a --- /dev/null +++ b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py @@ -0,0 +1,97 @@ +""" + Dremio source type to Datahub source type. +""" + + +class DremioToDataHubSourceTypeMapping: + """ + Dremio source type to the Datahub source type mapping. + """ + + def __init__(self): + self._source_type_mapping = { + # Dremio source types + "ADL": "abfs", + "AMAZONELASTIC": "elasticsearch", + "AWSGLUE": "glue", + "AZURE_STORAGE": "abfs", + "DB2": "db2", + "DREMIOTODREMIO": "dremio", + "ELASTIC": "elasticsearch", + "GCS": "gcs", + "HDFS": "s3", + "HIVE": "hive", + "HIVE3": "hive", + "MONGO": "mongodb", + "MSSQL": "mssql", + "MYSQL": "mysql", + "NAS": "s3", + "NESSIE": "iceberg", + "ORACLE": "oracle", + "POSTGRES": "postgres", + "REDSHIFT": "redshift", + "S3": "s3", + "SNOWFLAKE": "snowflake", + "SYNAPSE": "mssql", + "TERADATA": "teradata", + } + + self._database_source_types = { + "AMAZONELASTIC", + "AWSGLUE", + "AZURE_STORAGE", + "DB2", + "DREMIOTODREMIO", + "ELASTIC", + "HIVE", + "HIVE3", + "MONGO", + "MSSQL", + "MYSQL", + "NESSIE", + "ORACLE", + "POSTGRES", + "REDSHIFT", + "SNOWFLAKE", + "SYNAPSE", + "TERADATA", + } + + self._file_object_storage_types = { + "ADL", + "AZURE_STORAGE", + "GCS", + "HDFS", + "NAS", + "S3", + } + + def get_datahub_source_type(self, dremio_source_type): + """ + Return the datahub source type. + """ + return self._source_type_mapping.get( + dremio_source_type.upper(), dremio_source_type.lower() + ) + + def get_category(self, source_type): + """ + Define whether source uses dot notation (DB) or slash notation (Object storage) + """ + if source_type.upper() in self._database_source_types: + return "database" + if source_type.upper() in self._file_object_storage_types: + return "file_object_storage" + return "unknown" + + def add_mapping(self, dremio_source_type, datahub_source_type, category=None): + """ + Add new source type if not in map (e.g. Dremio ARP) + """ + dremio_source_type = dremio_source_type.upper() + self._source_type_mapping[dremio_source_type] = datahub_source_type + if category: + if category.lower() == "file_object_storage": + self._file_object_storage_types.add(dremio_source_type) + else: + self._database_source_types.add(dremio_source_type) diff --git a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_entities.py b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_entities.py new file mode 100644 index 0000000000000..f09055fe2fbc7 --- /dev/null +++ b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_entities.py @@ -0,0 +1,452 @@ +"""This Module contains utility functions for dremio source""" +import itertools +import logging +import re +import uuid +from collections import deque +from dataclasses import dataclass +from datetime import datetime +from enum import Enum +from typing import Any, Deque, Dict, List, Optional + +from sqlglot import parse_one + +from datahub.emitter.mce_builder import make_term_urn +from datahub.ingestion.source.dremio.dremio_api import ( + DremioAPIOperations, + DremioEdition, +) +from datahub.ingestion.source.dremio.dremio_profiling import DremioProfiler + +logger = logging.getLogger(__name__) + + +QUERY_TYPES = { + "DML": ["CREATE", "DELETE", "INSERT", "MERGE", "UPDATE"], + "DDL": [ + "ALTER BRANCH", + "ALTER PIPE", + "ALTER SOURCE", + "ALTER TABLE", + "ALTER TAG", + "ALTER VIEW", + "ANALYZE TABLE", + "COPY INTO", + "CREATE BRANCH", + "CREATE FOLDER", + "CREATE PIPE", + "CREATE ROLE", + "CREATE TABLE", + "CREATE TAG", + "CREATE USER", + "CREATE VIEW", + "DESCRIBE PIPE", + "DESCRIBE TABLE", + "DROP BRANCH", + "DROP FOLDER", + "DROP PIPE", + "DROP ROLE", + "DROP TABLE", + "DROP TAG", + "DROP USER", + "DROP VIEW", + "GRANT ROLE", + "GRANT TO ROLE", + "GRANT TO USER", + "MERGE BRANCH", + "REVOKE FROM ROLE", + "REVOKE FROM USER", + "REVOKE ROLE", + "SHOW BRANCHES", + "SHOW CREATE TABLE", + "SHOW CREATE VIEW", + "SHOW LOGS", + "SHOW TABLES", + "SHOW TAGS", + "SHOW VIEWS", + "USE", + "VACUUM CATALOG", + "VACUUM TABLE", + ], + "SELECT": ["SELECT", "WITH"], + "DATA_MANIPULATION": ["INSERT INTO", "MERGE INTO", "CREATE TABLE"], +} + + +class DremioDatasetType(Enum): + VIEW = "View" + TABLE = "Table" + + +class DremioGlossaryTerm: + urn: str + glossary_term: str + + def __init__(self, glossary_term: str): + self.urn = self.set_glossary_term( + glossary_term=glossary_term, + ) + self.glossary_term = glossary_term + + def set_glossary_term(self, glossary_term: str) -> str: + namespace = uuid.NAMESPACE_DNS + + return make_term_urn(term=str(uuid.uuid5(namespace, glossary_term))) + + +@dataclass +class DremioDatasetColumn: + name: str + ordinal_position: int + data_type: str + column_size: int + is_nullable: bool + + +class DremioQuery: + query_without_comments: str + query_type: str + query_subtype: str + affected_dataset: str + + def __init__( + self, + job_id: str, + username: str, + submitted_ts: str, + query: str, + queried_datasets: str, + affected_datasets: Optional[str] = None, + ): + self.job_id = job_id + self.username = username + self.submitted_ts = self._get_submitted_ts(submitted_ts) + self.query = self._get_query(query) + self.query_without_comments = self.get_raw_query(query) + self.query_type = self._get_query_type() + self.query_subtype = self._get_query_subtype() + self.queried_datasets = self._get_queried_datasets(queried_datasets) + if affected_datasets: + self.affected_dataset = affected_datasets + else: + self.affected_dataset = self._get_affected_tables() + + def get(self, attr): + return getattr(self, attr, None) + + def _get_submitted_ts(self, timestamp: str) -> datetime: + return datetime.strptime(timestamp, "%Y-%m-%d %H:%M:%S.%f") + + def _get_query(self, query: str) -> str: + return str(query).replace("'", "'") + + def _get_query_type(self) -> str: + query_operator = re.split( + pattern=r"\s+", + string=self.query_without_comments.strip(), + maxsplit=1, + )[0] + + if query_operator in QUERY_TYPES["SELECT"]: + return "SELECT" + if query_operator in QUERY_TYPES["DML"]: + return "DML" + return "DDL" + + def _get_query_subtype(self) -> str: + for query_operator in ( + QUERY_TYPES["SELECT"] + QUERY_TYPES["DML"] + QUERY_TYPES["DDL"] + ): + if self.query_without_comments.upper().startswith(query_operator): + return query_operator + return "UNDEFINED" + + def _get_queried_datasets(self, queried_datasets: str) -> List[str]: + return list( + {dataset.strip() for dataset in queried_datasets.strip("[]").split(",")} + ) + + def _get_affected_tables(self) -> str: + # TO DO + # for manipulation_operator in _data_manipulation_queries: + # if self.query_without_comments.upper().startswith(manipulation_operator): + + return "" + + def get_raw_query(self, sql_query: str) -> str: + parsed = parse_one(sql_query) + return parsed.sql(comments=False) + + +class DremioDataset: + resource_id: str + resource_name: str + path: List[str] + location_id: str + columns: List[DremioDatasetColumn] + sql_definition: Optional[str] + dataset_type: DremioDatasetType + owner: Optional[str] + owner_type: Optional[str] + created: str + parents: Optional[List[str]] + description: Optional[str] + format_type: Optional[str] + glossary_terms: List[DremioGlossaryTerm] = [] + + def __init__( + self, + dataset_details: Dict[str, Any], + api_operations: DremioAPIOperations, + ): + self.glossary_terms: List[DremioGlossaryTerm] = [] + self.resource_id = dataset_details.get("RESOURCE_ID", "") + self.resource_name = dataset_details.get("TABLE_NAME", "") + self.path = dataset_details.get("TABLE_SCHEMA", "")[1:-1].split(", ")[:-1] + self.location_id = dataset_details.get("LOCATION_ID", "") + # self.columns = dataset_details.get("COLUMNS", []) + # Initialize DremioDatasetColumn instances for each column + self.columns = [ + DremioDatasetColumn( + name=col.get("name"), + ordinal_position=col.get("ordinal_position"), + is_nullable=col.get("is_nullable", False), + data_type=col.get("data_type"), + column_size=col.get("column_size"), + ) + for col in dataset_details.get("COLUMNS", []) + ] + + self.sql_definition = dataset_details.get("VIEW_DEFINITION") + + if self.sql_definition: + self.dataset_type = DremioDatasetType.VIEW + else: + self.dataset_type = DremioDatasetType.TABLE + + if api_operations.edition in ( + DremioEdition.ENTERPRISE, + DremioEdition.CLOUD, + ): + self.created = dataset_details.get("CREATED", "") + self.owner = dataset_details.get("OWNER") + self.owner_type = dataset_details.get("OWNER_TYPE") + self.format_type = dataset_details.get("FORMAT_TYPE") + + self.description = api_operations.get_description_for_resource( + resource_id=self.resource_id + ) + + glossary_terms = api_operations.get_tags_for_resource( + resource_id=self.resource_id + ) + if glossary_terms is not None: + for glossary_term in glossary_terms: + self.glossary_terms.append( + DremioGlossaryTerm(glossary_term=glossary_term) + ) + + if self.sql_definition and api_operations.edition == DremioEdition.ENTERPRISE: + self.parents = api_operations.get_view_parents( + dataset_id=self.resource_id, + ) + + def get_profile_data(self, profiler: DremioProfiler) -> Dict: + full_table_name = '"' + '"."'.join(self.path) + '"."' + self.resource_name + '"' + columns = [(col.name, col.data_type) for col in self.columns] + return profiler.profile_table(full_table_name, columns) + + +class DremioContainer: + container_name: str + location_id: str + path: List[str] + description: Optional[str] + subclass: str + + def __init__( + self, + container_name: str, + location_id: str, + path: List[str], + api_operations: DremioAPIOperations, + ): + self.container_name = container_name + self.location_id = location_id + self.path = path + + self.description = api_operations.get_description_for_resource( + resource_id=location_id, + ) + + +class DremioSource(DremioContainer): + subclass: str = "Dremio Source" + dremio_source_type: str + root_path: Optional[str] + database_name: Optional[str] + + def __init__( + self, + container_name: str, + location_id: str, + path: List[str], + api_operations: DremioAPIOperations, + dremio_source_type: str, + root_path: Optional[str] = None, + database_name: Optional[str] = None, + ): + super().__init__( + container_name=container_name, + location_id=location_id, + path=path, + api_operations=api_operations, + ) + self.dremio_source_type = dremio_source_type + self.root_path = root_path + self.database_name = database_name + + +class DremioSpace(DremioContainer): + subclass: str = "Dremio Space" + + +class DremioFolder(DremioContainer): + subclass: str = "Dremio Folder" + + +class DremioCatalog: + dremio_api: DremioAPIOperations + edition: DremioEdition + + def __init__(self, dremio_api: DremioAPIOperations): + self.dremio_api = dremio_api + self.edition = dremio_api.edition + self.datasets: Deque[DremioDataset] = deque() + self.sources: Deque[DremioSource] = deque() + self.spaces: Deque[DremioSpace] = deque() + self.folders: Deque[DremioFolder] = deque() + self.glossary_terms: Deque[DremioGlossaryTerm] = deque() + self.queries: Deque[DremioQuery] = deque() + + self.datasets_populated = False + self.containers_populated = False + self.queries_populated = False + + def set_datasets(self) -> None: + if not self.datasets_populated: + for dataset_details in self.dremio_api.get_all_tables_and_columns(): + dremio_dataset = DremioDataset( + dataset_details=dataset_details, + api_operations=self.dremio_api, + ) + self.datasets.append(dremio_dataset) + + for glossary_term in dremio_dataset.glossary_terms: + if glossary_term not in self.glossary_terms: + self.glossary_terms.append(glossary_term) + + self.datasets_populated = True + + def force_reset_datasets(self) -> None: + self.datasets_populated = False + self.set_datasets() + + def get_datasets(self) -> Deque[DremioDataset]: + self.set_datasets() + return self.datasets + + def set_containers(self) -> None: + if not self.containers_populated: + for container in self.dremio_api.get_all_containers(): + container_type = container.get("container_type") + if container_type == "SOURCE": + self.sources.append( + DremioSource( + container_name=container.get("name"), + location_id=container.get("id"), + path=[], + api_operations=self.dremio_api, + dremio_source_type=container.get("source_type"), + root_path=container.get("root_path"), + database_name=container.get("database_name"), + ) + ) + elif container_type == "SPACE": + self.spaces.append( + DremioSpace( + container_name=container.get("name"), + location_id=container.get("id"), + path=[], + api_operations=self.dremio_api, + ) + ) + elif container_type == "FOLDER": + self.folders.append( + DremioFolder( + container_name=container.get("name"), + location_id=container.get("id"), + path=container.get("path"), + api_operations=self.dremio_api, + ) + ) + else: + self.spaces.append( + DremioSpace( + container_name=container.get("name"), + location_id=container.get("id"), + path=[], + api_operations=self.dremio_api, + ) + ) + + logging.info("Containers retrieved from source") + + self.containers_populated = True + + def force_reset_containers(self) -> None: + self.containers_populated = False + self.set_containers() + + def get_containers(self) -> Deque: + self.set_containers() + return deque(itertools.chain(self.sources, self.spaces, self.folders)) + + def get_sources(self) -> Deque[DremioSource]: + self.set_containers() + return self.sources + + def get_glossary_terms(self) -> Deque[DremioGlossaryTerm]: + self.set_datasets() + self.set_containers() + return self.glossary_terms + + def force_set_glossary_terms(self) -> None: + self.force_reset_containers() + self.force_reset_datasets() + + def is_valid_query(self, query: Dict[str, Any]) -> bool: + required_fields = [ + "job_id", + "user_name", + "submitted_ts", + "query", + "queried_datasets", + ] + return all(query.get(field) for field in required_fields) + + def get_queries(self) -> Deque[DremioQuery]: + for query in self.dremio_api.extract_all_queries(): + if not self.is_valid_query(query): + continue + self.queries.append( + DremioQuery( + job_id=query["job_id"], + username=query["user_name"], + submitted_ts=query["submitted_ts"], + query=query["query"], + queried_datasets=query["queried_datasets"], + ) + ) + self.queries_populated = True + return self.queries diff --git a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_profiling.py b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_profiling.py new file mode 100644 index 0000000000000..07e8a4ee0e7d0 --- /dev/null +++ b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_profiling.py @@ -0,0 +1,189 @@ +import logging +import re +from typing import Any, Dict, List, Tuple + +from datahub.ingestion.source.dremio.dremio_api import DremioAPIOperations +from datahub.ingestion.source.dremio.dremio_config import ProfileConfig + +logger = logging.getLogger(__name__) + + +class DremioProfiler: + def __init__(self, api_operations: DremioAPIOperations, config: ProfileConfig): + self.api_operations = api_operations + self.config = config + self.MAX_COLUMNS_PER_QUERY = 800 + self.QUERY_TIMEOUT = config.query_timeout # 5 minutes timeout for each query + + def profile_table(self, table_name: str, columns: List[Tuple[str, str]]) -> Dict: + chunked_columns = self._chunk_columns(columns) + profile_results = [] + + for chunk in chunked_columns: + try: + chunk_result = self._profile_chunk(table_name, chunk) + profile_results.append(chunk_result) + except Exception as e: + logger.error(f"Error profiling chunk of {table_name}: {str(e)}") + profile_results.append(self._create_empty_profile_result(chunk)) + + return self._combine_profile_results(profile_results) + + def _profile_chunk(self, table_name: str, columns: List[Tuple[str, str]]) -> Dict: + profile_sql = self._build_profile_sql(table_name, columns) + try: + results = self.api_operations.execute_query(profile_sql) + return self._parse_profile_results(results, columns) + except Exception as e: + logger.error(f"Error profiling {table_name}: {str(e)}") + raise + + def _chunk_columns( + self, columns: List[Tuple[str, str]] + ) -> List[List[Tuple[str, str]]]: + return [ + columns[i : i + self.MAX_COLUMNS_PER_QUERY] + for i in range(0, len(columns), self.MAX_COLUMNS_PER_QUERY) + ] + + def _build_profile_sql( + self, table_name: str, columns: List[Tuple[str, str]] + ) -> str: + metrics = [] + + if self.config.row_count: + metrics.append("COUNT(*) AS row_count") + + if self.config.column_count: + metrics.append(f"{len(columns)} AS column_count") + + for column_name, data_type in columns: + try: + metrics.extend(self._get_column_metrics(column_name, data_type)) + except Exception as e: + logger.warning( + f"Error building metrics for column {column_name}: {str(e)}" + ) + # Skip this column and continue with others + + if not metrics: + raise ValueError("No valid metrics could be generated") + + main_query = f"SELECT {', '.join(metrics)} FROM {table_name}" + + return main_query + + def _get_column_metrics(self, column_name: str, data_type: str) -> List[str]: + metrics = [] + + # Wrap column name in quotes to handle special characters + quoted_column_name = f'"{column_name}"' + safe_column_name = re.sub(r"\W|^(?=\d)", "_", column_name) + + if self.config.include_field_distinct_count: + metrics.append( + f"COUNT(DISTINCT {quoted_column_name}) AS {safe_column_name}_distinct_count" + ) + + if self.config.include_field_null_count: + metrics.append( + f"SUM(CASE WHEN {quoted_column_name} IS NULL THEN 1 ELSE 0 END) AS {safe_column_name}_null_count" + ) + + if self.config.include_field_min_value: + metrics.append(f"MIN({quoted_column_name}) AS {safe_column_name}_min") + + if self.config.include_field_max_value: + metrics.append(f"MAX({quoted_column_name}) AS {safe_column_name}_max") + + if data_type.lower() in ["int", "bigint", "float", "double", "decimal"]: + if self.config.include_field_mean_value: + metrics.append( + f"AVG(CAST({quoted_column_name} AS DOUBLE)) AS {safe_column_name}_mean" + ) + + if self.config.include_field_stddev_value: + metrics.append( + f"STDDEV(CAST({quoted_column_name} AS DOUBLE)) AS {safe_column_name}_stdev" + ) + + if self.config.include_field_median_value: + metrics.append( + f"MEDIAN({quoted_column_name}) AS {safe_column_name}_median" + ) + + if self.config.include_field_quantiles: + metrics.append( + f"PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY {quoted_column_name}) AS {safe_column_name}_25th_percentile" + ) + metrics.append( + f"PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY {quoted_column_name}) AS {safe_column_name}_75th_percentile" + ) + + return metrics + + def _parse_profile_results( + self, results: List[Dict], columns: List[Tuple[str, str]] + ) -> Dict: + profile: Dict[str, Any] = {"column_stats": {}} + result = results[0] if results else {} # We expect only one row of results + + if self.config.row_count: + profile["row_count"] = int(result.get("row_count", 0)) + + if self.config.column_count: + profile["column_count"] = int(result.get("column_count", 0)) + + for column_name, data_type in columns: + safe_column_name = re.sub(r"\W|^(?=\d)", "_", column_name) + column_stats: Dict[str, Any] = {} + if self.config.include_field_distinct_count: + column_stats["distinct_count"] = int( + result.get(f"{safe_column_name}_distinct_count", 0) + ) + if self.config.include_field_null_count: + column_stats["null_count"] = int( + result.get(f"{safe_column_name}_null_count", 0) + ) + if self.config.include_field_min_value: + column_stats["min"] = result.get(f"{safe_column_name}_min") + if self.config.include_field_max_value: + column_stats["max"] = result.get(f"{safe_column_name}_max") + + if data_type.lower() in ["int", "bigint", "float", "double", "decimal"]: + if self.config.include_field_mean_value: + column_stats["mean"] = result.get(f"{safe_column_name}_mean") + if self.config.include_field_stddev_value: + column_stats["stdev"] = result.get(f"{safe_column_name}_stdev") + if self.config.include_field_median_value: + column_stats["median"] = result.get(f"{safe_column_name}_median") + if self.config.include_field_quantiles: + column_stats["quantiles"] = [ + result.get(f"{safe_column_name}_25th_percentile"), + result.get(f"{safe_column_name}_75th_percentile"), + ] + + profile["column_stats"][column_name] = column_stats + + return profile + + def _create_empty_profile_result(self, columns: List[Tuple[str, str]]) -> Dict: + profile: Dict[str, Any] = {"column_stats": {}} + for column_name, _ in columns: + profile["column_stats"][column_name] = {} + return profile + + def _combine_profile_results(self, profile_results: List[Dict]) -> Dict: + combined_profile = {} + combined_profile["row_count"] = sum( + profile.get("row_count", 0) for profile in profile_results + ) + combined_profile["column_count"] = sum( + profile.get("column_count", 0) for profile in profile_results + ) + combined_profile["column_stats"] = {} + + for profile in profile_results: + combined_profile["column_stats"].update(profile.get("column_stats", {})) + + return combined_profile diff --git a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_source.py b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_source.py new file mode 100644 index 0000000000000..cd5c91cbb5d1e --- /dev/null +++ b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_source.py @@ -0,0 +1,608 @@ +"""This module contains the Dremio source class for DataHub ingestion""" +import logging +import re +from collections import defaultdict +from concurrent.futures import ThreadPoolExecutor, as_completed +from dataclasses import dataclass +from datetime import datetime +from typing import Any, Dict, Iterable, List, Optional + +from datahub.emitter.mce_builder import ( + make_data_platform_urn, + make_dataset_urn_with_platform_instance, +) +from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.ingestion.api.decorators import ( + SupportStatus, + capability, + config_class, + platform_name, + support_status, +) +from datahub.ingestion.api.source import SourceCapability, SourceReport +from datahub.ingestion.api.workunit import MetadataWorkUnit +from datahub.ingestion.source.dremio.dremio_api import ( + DremioAPIOperations, + DremioEdition, +) +from datahub.ingestion.source.dremio.dremio_aspects import DremioAspects +from datahub.ingestion.source.dremio.dremio_config import ( + DremioSourceConfig, + DremioSourceMapping, +) +from datahub.ingestion.source.dremio.dremio_datahub_source_mapping import ( + DremioToDataHubSourceTypeMapping, +) +from datahub.ingestion.source.dremio.dremio_entities import ( + DremioCatalog, + DremioContainer, + DremioDataset, + DremioDatasetType, + DremioGlossaryTerm, + DremioQuery, +) +from datahub.ingestion.source.dremio.dremio_profiling import ( + DremioProfiler, + ProfileConfig, +) +from datahub.ingestion.source.state.entity_removal_state import GenericCheckpointState +from datahub.ingestion.source.state.stale_entity_removal_handler import ( + StaleEntityRemovalHandler, + StaleEntityRemovalSourceReport, +) +from datahub.ingestion.source.state.stateful_ingestion_base import ( + StatefulIngestionSourceBase, +) +from datahub.ingestion.source.usage.usage_common import BaseUsageConfig +from datahub.metadata.com.linkedin.pegasus2avro.dataset import ( + DatasetLineageTypeClass, + UpstreamClass, + UpstreamLineage, +) +from datahub.metadata.schema_classes import ChangeTypeClass, SchemaMetadataClass +from datahub.metadata.urns import CorpUserUrn +from datahub.sql_parsing.sql_parsing_aggregator import ( + KnownQueryLineageInfo, + ObservedQuery, + SqlParsingAggregator, +) + +logger = logging.getLogger(__name__) + + +@dataclass +class DremioSourceReport(StaleEntityRemovalSourceReport): + num_containers_failed: int = 0 + num_datasets_failed: int = 0 + + def report_upstream_latency(self, start_time: datetime, end_time: datetime) -> None: + # recording total combined latency is not very useful, keeping this method as a placeholder + # for future implementation of min / max / percentiles etc. + pass + + +@platform_name("Dremio") +@config_class(DremioSourceConfig) +@support_status(SupportStatus.CERTIFIED) +@capability(SourceCapability.CONTAINERS, "Enabled by default") +@capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration") +@capability(SourceCapability.DESCRIPTIONS, "Enabled by default") +@capability(SourceCapability.DOMAINS, "Supported via the `domain` config field") +@capability(SourceCapability.LINEAGE_COARSE, "Enabled by default") +@capability(SourceCapability.OWNERSHIP, "Enabled by default") +@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default") +class DremioSource(StatefulIngestionSourceBase): + """ + This plugin extracts the following: + - Metadata for databases, schemas, views and tables + - Column types associated with each table + - Table, row, and column statistics via optional SQL profiling + - Lineage information for views and datasets + """ + + config: DremioSourceConfig + report: DremioSourceReport + + def __init__(self, config: DremioSourceConfig, ctx: Any): + super().__init__(config, ctx) + self.config = config + self.report = DremioSourceReport() + self.source_map: Dict[str, DremioSourceMapping] = defaultdict() + + # Initialize API operations + dremio_api = DremioAPIOperations(self.config) + + # Initialize catalog + self.dremio_catalog = DremioCatalog(dremio_api) + + # Initialize profiler + profile_config = ProfileConfig() + self.profiler = DremioProfiler(dremio_api, profile_config) + + # Initialize aspects + self.dremio_aspects = DremioAspects( + platform=self.get_platform(), + profiler=self.profiler, + domain=self.config.domain, + platform_instance=self.config.platform_instance, + env=self.config.env, + profiling_enabled=self.config.profiling.enabled, + base_url=dremio_api.base_url, + ) + self.reference_source_mapping = DremioToDataHubSourceTypeMapping() + self.max_workers = config.max_workers + + # Handle stale entity removal + self.stale_entity_removal_handler = StaleEntityRemovalHandler( + source=self, + config=self.config, + state_type_class=GenericCheckpointState, + pipeline_name=self.ctx.pipeline_name, + run_id=self.ctx.run_id, + ) + + self.sql_parsing_aggregator = SqlParsingAggregator( + platform=make_data_platform_urn(self.get_platform()), + platform_instance=self.config.platform_instance, + env=self.config.env, + graph=self.ctx.graph, + generate_usage_statistics=True, + generate_operations=True, + usage_config=BaseUsageConfig(), + ) + + @classmethod + def create(cls, config_dict: Dict[str, Any], ctx: Any) -> "DremioSource": + config = DremioSourceConfig.parse_obj(config_dict) + return cls(config, ctx) + + def get_platform(self) -> str: + return "dremio" + + def _build_source_map(self) -> Dict[str, DremioSourceMapping]: + source_map = {} + dremio_sources = self.dremio_catalog.get_sources() + + for source in dremio_sources: + source_name = source.container_name + if isinstance(source.dremio_source_type, str): + source_type = source.dremio_source_type.lower() + root_path = source.root_path.lower() if source.root_path else "" + database_name = ( + source.database_name.lower() if source.database_name else "" + ) + source_present = False + source_platform_name = source_name + + for mapping in self.config.source_mappings or []: + if not mapping.platform_name: + continue + + if re.search(mapping.platform_name, source_type, re.IGNORECASE): + source_platform_name = mapping.platform_name.lower() + + if not mapping.platform: + continue + + datahub_source_type = ( + self.reference_source_mapping.get_datahub_source_type( + source_type + ) + ) + + if re.search(mapping.platform, datahub_source_type, re.IGNORECASE): + source_platform_name = source_platform_name.lower() + source_map[source_platform_name] = mapping + source_map[ + source_platform_name + ].dremio_source_type = self.reference_source_mapping.get_category( + source_type + ) + source_map[source_platform_name].root_path = root_path + source_map[source_platform_name].database_name = database_name + source_present = True + break + + if not source_present: + try: + dremio_source_type = self.reference_source_mapping.get_category( + source_type + ) + except Exception as exc: + logger.info( + f"Source {source_type} is not a standard Dremio source type. " + f"Adding source_type {source_type} to mapping as database. Error: {exc}" + ) + + self.reference_source_mapping.add_mapping( + source_type, source_name + ) + dremio_source_type = self.reference_source_mapping.get_category( + source_type + ) + + source_map[source_platform_name.lower()] = DremioSourceMapping( + platform=source_type, + platform_name=source_name, + dremio_source_type=dremio_source_type, + ) + + else: + logger.error( + f'Source "{source.container_name}" is broken. Containers will not be created for source.' + ) + logger.error( + f'No new cross-platform lineage will be emitted for source "{source.container_name}".' + ) + logger.error("Fix this source in Dremio to fix this issue.") + + return source_map + + def get_workunits(self) -> Iterable[MetadataWorkUnit]: + """ + Generate workunits for Dremio metadata. + """ + + self.source_map = self._build_source_map() + + for wu in self.get_workunits_internal(): + self.report.report_workunit(wu) + yield wu + + # Emit the stale entity removal workunits + yield from self.stale_entity_removal_handler.gen_removed_entity_workunits() + + def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: + """ + Internal method to generate workunits for Dremio metadata. + """ + + # Process Containers + containers = self.dremio_catalog.get_containers() + with ThreadPoolExecutor(max_workers=self.max_workers) as executor: + future_to_container = { + executor.submit(self.process_container, container): container + for container in containers + } + + for future in as_completed(future_to_container): + container_info = future_to_container[future] + try: + yield from future.result() + logger.info( + f"Dremio container {container_info.container_name} emitted successfully" + ) + except Exception as exc: + self.report.num_containers_failed = +1 + self.report.report_failure( + "Failed to process Dremio container", + f"Failed to process container {'.'.join(container_info.path)}.{container_info.resource_name}: {exc}", + ) + + # Process Datasets + datasets = self.dremio_catalog.get_datasets() + + with ThreadPoolExecutor(max_workers=self.max_workers) as executor: + future_to_dataset = { + executor.submit(self.process_dataset, dataset): dataset + for dataset in datasets + } + + for future in as_completed(future_to_dataset): + dataset_info = future_to_dataset[future] + try: + yield from future.result() + logger.info( + f"Dremio dataset {'.'.join(dataset_info.path)}.{dataset_info.resource_name} emitted successfully" + ) + except Exception as exc: + self.report.num_datasets_failed = +1 + self.report.report_failure( + "Failed to process Dremio dataset", + f"Failed to process dataset {'.'.join(dataset_info.path)}.{dataset_info.resource_name}: {exc}", + ) + + # Optionally Process Query Lineage + if self.config.include_query_lineage: + self.get_query_lineage_workunits() + + # Process Glossary Terms + glossary_terms = self.dremio_catalog.get_glossary_terms() + + with ThreadPoolExecutor(max_workers=self.max_workers) as executor: + future_to_glossary_term = { + executor.submit( + self.process_glossary_term, glossary_term + ): glossary_term + for glossary_term in glossary_terms + } + + for future in as_completed(future_to_glossary_term): + glossary_term_info = future_to_glossary_term[future] + try: + yield from future.result() + except Exception as exc: + self.report.report_failure( + "Failed to process Glossary terms", + f"Failed to process glossary term {glossary_term_info.glossary_term}: {exc}", + ) + + # Generate workunit for aggregated SQL parsing results + for mcp in self.sql_parsing_aggregator.gen_metadata(): + self.report.report_workunit(mcp.as_workunit()) + yield mcp.as_workunit() + + def process_container( + self, container_info: DremioContainer + ) -> Iterable[MetadataWorkUnit]: + """ + Process a Dremio container and generate metadata workunits. + """ + container_urn = self.dremio_aspects.get_container_urn( + path=container_info.path, name=container_info.container_name + ) + self.stale_entity_removal_handler.add_entity_to_state( + type="container", urn=container_urn + ) + yield from self.dremio_aspects.populate_container_mcp( + container_urn, container_info + ) + + def process_dataset( + self, dataset_info: DremioDataset + ) -> Iterable[MetadataWorkUnit]: + """ + Process a Dremio dataset and generate metadata workunits. + """ + + schema_str = ".".join(dataset_info.path) + + dataset_urn = make_dataset_urn_with_platform_instance( + platform=make_data_platform_urn(self.get_platform()), + name=f"dremio.{schema_str}.{dataset_info.resource_name}".lower(), + env=self.config.env, + platform_instance=self.config.platform_instance, + ) + + # Mark the entity as scanned + self.stale_entity_removal_handler.add_entity_to_state( + type="dataset", + urn=dataset_urn, + ) + + for dremio_mcp in self.dremio_aspects.populate_dataset_mcp( + dataset_urn, dataset_info + ): + yield dremio_mcp + # Check if the emitted aspect is SchemaMetadataClass + if isinstance(dremio_mcp.metadata, SchemaMetadataClass): + self.sql_parsing_aggregator.register_schema( + urn=dataset_urn, + schema=dremio_mcp.metadata, + ) + + if dataset_info.dataset_type == DremioDatasetType.VIEW: + if ( + self.dremio_catalog.edition == DremioEdition.ENTERPRISE + and dataset_info.parents + ): + yield from self.generate_view_lineage( + parents=dataset_info.parents, + dataset_urn=dataset_urn, + ) + + if dataset_info.sql_definition: + self.sql_parsing_aggregator.add_view_definition( + view_urn=dataset_urn, + view_definition=dataset_info.sql_definition, + default_db="dremio", + ) + + elif dataset_info.dataset_type == DremioDatasetType.TABLE: + dremio_source = dataset_info.path[0] if dataset_info.path else None + + if dremio_source: + upstream_urn = self._map_dremio_dataset_to_urn( + dremio_source=dremio_source, + dremio_path=dataset_info.path, + dremio_dataset=dataset_info.resource_name, + ) + + if upstream_urn: + upstream_lineage = UpstreamLineage( + upstreams=[ + UpstreamClass( + dataset=upstream_urn, + type=DatasetLineageTypeClass.COPY, + ) + ] + ) + mcp = MetadataChangeProposalWrapper( + entityUrn=dataset_urn, + aspect=upstream_lineage, + ) + yield mcp.as_workunit() + self.sql_parsing_aggregator.add_known_lineage_mapping( + upstream_urn=upstream_urn, + downstream_urn=dataset_urn, + lineage_type=DatasetLineageTypeClass.COPY, + ) + + def process_glossary_term( + self, glossary_term_info: DremioGlossaryTerm + ) -> Iterable[MetadataWorkUnit]: + """ + Process a Dremio container and generate metadata workunits. + """ + + glossary_term_urn = glossary_term_info.urn + + self.stale_entity_removal_handler.add_entity_to_state( + type="glossaryTerm", urn=glossary_term_urn + ) + + yield from self.dremio_aspects.populate_glossary_term_mcp(glossary_term_info) + + def generate_view_lineage( + self, dataset_urn: str, parents: List[str] + ) -> Iterable[MetadataWorkUnit]: + """ + Generate lineage information for views. + """ + upstream_urns = [ + make_dataset_urn_with_platform_instance( + platform=make_data_platform_urn(self.get_platform()), + name=f"dremio.{upstream_table.lower()}", + env=self.config.env, + platform_instance=self.config.platform_instance, + ) + for upstream_table in parents + ] + + lineage = UpstreamLineage( + upstreams=[ + UpstreamClass( + dataset=upstream_urn, + type=DatasetLineageTypeClass.VIEW, + ) + for upstream_urn in upstream_urns + ] + ) + mcp = MetadataChangeProposalWrapper( + entityType="dataset", + entityUrn=dataset_urn, + aspectName=lineage.ASPECT_NAME, + aspect=lineage, + changeType=ChangeTypeClass.UPSERT, + ) + + for upstream_urn in upstream_urns: + self.sql_parsing_aggregator.add_known_lineage_mapping( + upstream_urn=upstream_urn, + downstream_urn=dataset_urn, + lineage_type=DatasetLineageTypeClass.VIEW, + ) + + yield MetadataWorkUnit(id=f"{dataset_urn}-upstreamLineage", mcp=mcp) + + def get_query_lineage_workunits(self) -> None: + """ + Process query lineage information. + """ + + queries = self.dremio_catalog.get_queries() + + with ThreadPoolExecutor(max_workers=self.max_workers) as executor: + future_to_query = { + executor.submit(self.process_query, query): query for query in queries + } + + for future in as_completed(future_to_query): + query = future_to_query[future] + try: + future.result() + except Exception as exc: + self.report.report_failure( + "Failed to process dremio query", + f"Failed to process query {query.job_id}: {exc}", + ) + + def process_query(self, query: DremioQuery) -> None: + """ + Process a single Dremio query for lineage information. + """ + + if query.query and query.affected_dataset: + upstream_urns = [ + make_dataset_urn_with_platform_instance( + platform=make_data_platform_urn(self.get_platform()), + name=f"dremio.{ds.lower()}", + env=self.config.env, + platform_instance=self.config.platform_instance, + ) + for ds in query.queried_datasets + ] + + downstream_urn = make_dataset_urn_with_platform_instance( + platform=make_data_platform_urn(self.get_platform()), + name=f"dremio.{query.affected_dataset.lower()}", + env=self.config.env, + platform_instance=self.config.platform_instance, + ) + + # Add query to SqlParsingAggregator + self.sql_parsing_aggregator.add_known_query_lineage( + KnownQueryLineageInfo( + query_text=query.query, + upstreams=upstream_urns, + downstream=downstream_urn, + ) + ) + + # Add observed query + self.sql_parsing_aggregator.add_observed_query( + ObservedQuery( + query=query.query, + timestamp=query.submitted_ts, + user=CorpUserUrn(username=query.username), + default_db="dremio", + ) + ) + + def _map_dremio_dataset_to_urn( + self, + dremio_source: str, + dremio_path: List[str], + dremio_dataset: str, + ) -> Optional[str]: + """ + Map a Dremio dataset to a DataHub URN. + """ + + mapping = self.source_map.get(dremio_source.lower()) + if not mapping: + return None + + if not mapping.platform: + return None + + root_path = "" + database_name = "" + + if mapping.dremio_source_type == "file_object_storage": + if mapping.root_path: + root_path = f"{mapping.root_path[1:]}/" + dremio_dataset = f"{root_path}{'/'.join(dremio_path[1:])}/{dremio_dataset}" + else: + if mapping.database_name: + database_name = f"{mapping.database_name}." + dremio_dataset = ( + f"{database_name}{'.'.join(dremio_path[1:])}.{dremio_dataset}" + ) + + if mapping.platform_instance: + return make_dataset_urn_with_platform_instance( + platform=mapping.platform.lower(), + name=dremio_dataset, + platform_instance=mapping.platform_instance, + env=self.config.env, + ) + + return make_dataset_urn_with_platform_instance( + platform=mapping.platform.lower(), + name=dremio_dataset, + platform_instance=None, + env=self.config.env, + ) + + def get_report(self) -> SourceReport: + """ + Get the source report. + """ + return self.report + + def close(self) -> None: + """ + Close any resources held by the source. + """ + pass diff --git a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_sql_queries.py b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_sql_queries.py new file mode 100644 index 0000000000000..281a5df82d326 --- /dev/null +++ b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_sql_queries.py @@ -0,0 +1,334 @@ +class DremioSQLQueries: + QUERY_DATASETS_CE = """ + SELECT* FROM + ( + SELECT + T.TABLE_SCHEMA, + T.TABLE_NAME, + CONCAT(T.TABLE_SCHEMA, '.', T.TABLE_NAME) AS FULL_TABLE_PATH, + V.VIEW_DEFINITION, + C.COLUMN_NAME, + C.IS_NULLABLE, + C.DATA_TYPE, + C.COLUMN_SIZE + FROM + INFORMATION_SCHEMA."TABLES" T + LEFT JOININFORMATION_SCHEMA.VIEWS V ON + V.TABLE_CATALOG = T.TABLE_CATALOG + AND V.TABLE_SCHEMA = T.TABLE_SCHEMA + AND V.TABLE_NAME = T.TABLE_NAME + INNER JOININFORMATION_SCHEMA.COLUMNS C ON + C.TABLE_CATALOG = T.TABLE_CATALOG + AND C.TABLE_SCHEMA = T.TABLE_SCHEMA + AND C.TABLE_NAME = T.TABLE_NAME + WHERE + T.TABLE_TYPE NOT IN ('SYSTEM_TABLE') + ) + WHERE 1=1 + {schema_pattern} + {table_pattern} + {deny_schema_pattern} + {deny_table_pattern} + ORDER BY + TABLE_SCHEMA ASC, + TABLE_NAME ASC + """ + + QUERY_DATASETS_EE = """ + SELECT* FROM + ( + SELECT + RESOURCE_ID, + V.TABLE_NAME, + OWNER, + PATH AS TABLE_SCHEMA, + CONCAT(REPLACE(REPLACE( + REPLACE(V.PATH, ', ', '.'), + '[', ''), ']', '' + )) AS FULL_TABLE_PATH, + OWNER_TYPE, + LOCATION_ID, + VIEW_DEFINITION, + FORMAT_TYPE, + COLUMN_NAME, + ORDINAL_POSITION, + IS_NULLABLE, + DATA_TYPE, + COLUMN_SIZE, + CREATED + FROM + (SELECT + VIEW_ID AS RESOURCE_ID, + VIEW_NAME AS TABLE_NAME, + PATH, + CASE + WHEN LENGTH(SCHEMA_ID) = 0 THEN SPACE_ID + ELSE SCHEMA_ID + END AS LOCATION_ID, + OWNER_ID, + SQL_DEFINITION AS VIEW_DEFINITION, + '' AS FORMAT_TYPE, + CREATED, + TYPE + FROM + SYS.VIEWS + UNION ALL + SELECT + TABLE_ID AS RESOURCE_ID, + TABLE_NAME, + PATH, + CASE + WHEN LENGTH(SCHEMA_ID) = 0 THEN SOURCE_ID + ELSE SCHEMA_ID + END AS LOCATION_ID, + OWNER_ID, + NULL AS VIEW_DEFINITION, + FORMAT_TYPE, + CREATED, + TYPE + FROM + SYS."TABLES" + ) V + LEFT JOIN + (SELECT + USER_ID AS ID, + USER_NAME AS "OWNER", + 'USER' AS OWNER_TYPE + FROM + SYS.USERS + UNION ALL + SELECT + ROLE_ID AS ID, + ROLE_NAME AS "OWNER", + 'GROUP' AS OWNER_TYPE + FROM + SYS.ROLES + ) U + ON + V.OWNER_ID = U.ID + LEFT JOIN + (SELECT + TABLE_SCHEMA, + TABLE_NAME, + COLUMN_NAME, + ORDINAL_POSITION, + IS_NULLABLE, + DATA_TYPE, + COLUMN_SIZE + FROM + INFORMATION_SCHEMA.COLUMNS + ) C + ON + CONCAT(REPLACE(REPLACE(REPLACE(V.PATH, ', ', '.'), '[', ''), ']', '')) = + CONCAT(C.TABLE_SCHEMA, '.', C.TABLE_NAME) + WHERE + V.TYPE NOT IN ('SYSTEM_TABLE') + ) + WHERE 1=1 + {schema_pattern} + {table_pattern} + {deny_schema_pattern} + {deny_table_pattern} + ORDER BY + TABLE_SCHEMA ASC, + TABLE_NAME ASC + """ + + QUERY_DATASETS_CLOUD = """ + SELECT* FROM + ( + SELECT + RESOURCE_ID, + V.TABLE_NAME, + OWNER, + PATH AS TABLE_SCHEMA, + CONCAT(REPLACE(REPLACE( + CONCAT(REPLACE(REPLACE( + REPLACE(V.PATH, ', ', '.'), + '[', ''), ']', '' + )) AS FULL_TABLE_PATH, + OWNER_TYPE, + LOCATION_ID, + VIEW_DEFINITION, + FORMAT_TYPE, + COLUMN_NAME, + ORDINAL_POSITION, + IS_NULLABLE, + DATA_TYPE, + COLUMN_SIZE, + CREATED + FROM + (SELECT + VIEW_ID AS RESOURCE_ID, + VIEW_NAME AS TABLE_NAME, + PATH, + CASE + WHEN LENGTH(SCHEMA_ID) = 0 THEN SPACE_ID + ELSE SCHEMA_ID + END AS LOCATION_ID, + OWNER_ID, + SQL_DEFINITION AS VIEW_DEFINITION, + '' AS FORMAT_TYPE, + CREATED, + TYPE + FROM + SYS.PROJECT.VIEWS + UNION ALL + SELECT + TABLE_ID AS RESOURCE_ID, + TABLE_NAME, + PATH, + CASE + WHEN LENGTH(SCHEMA_ID) = 0 THEN SOURCE_ID + ELSE SCHEMA_ID + END AS LOCATION_ID, + OWNER_ID, + NULL AS VIEW_DEFINITION, + FORMAT_TYPE, + CREATED, + TYPE + FROM + SYS.PROJECT."TABLES" + ) V + LEFT JOIN + (SELECT + USER_ID AS ID, + USER_NAME AS "OWNER", + 'USER' AS OWNER_TYPE + FROM + SYS.ORGANIZATION.USERS + UNION ALL + SELECT + ROLE_ID AS ID, + ROLE_NAME AS "OWNER", + 'GROUP' AS OWNER_TYPE + FROM + SYS.ORGANIZATION.ROLES + ) U + ON + V.OWNER_ID = U.ID + LEFT JOIN + (SELECT + TABLE_SCHEMA, + TABLE_NAME, + COLUMN_NAME, + ORDINAL_POSITION, + IS_NULLABLE, + DATA_TYPE, + COLUMN_SIZE + FROM + INFORMATION_SCHEMA.COLUMNS + ) C + ON + CONCAT(REPLACE(REPLACE(REPLACE(V.PATH, ', ', '.'), '[', ''), ']', '')) = + CONCAT(C.TABLE_SCHEMA, '.', C.TABLE_NAME) + WHERE + V.TYPE NOT IN ('SYSTEM_TABLE') + ) + WHERE 1=1 + {schema_pattern} + {table_pattern} + {deny_schema_pattern} + {deny_table_pattern} + ORDER BY + TABLE_SCHEMA ASC, + TABLE_NAME ASC + """ + + QUERY_ALL_JOBS = """ + SELECT + * + FROM + SYS.JOBS_RECENT + WHERE + STATUS = 'COMPLETED' + AND LENGTH(queried_datasets)>0 + AND user_name != '$dremio$' + AND query_type not like '%INTERNAL%' + """ + + QUERY_ALL_JOBS_CLOUD = """ + SELECT + * + FROM + SYS.PROJECT.HISTORY.JOBS + WHERE + STATUS = 'COMPLETED' + AND LENGTH(queried_datasets)>0 + AND user_name != '$dremio$' + AND query_type not like '%INTERNAL%' + """ + + QUERY_TYPES = [ + "ALTER TABLE", + "ALTER VIEW", + "COPY INTO", + "CREATE TABLE", + "CREATE VIEW", + "DROP TABLE", + "DROP VIEW", + "SELECT", + "WITH", + ] + + PROFILE_COLUMNS = """ + SELECT + {profile_queries} + FROM( + SELECT + * + FROM + {dremio_dataset} + LIMIT {sample_limit} + ) + """ + + DISTINCT_COUNT_VALUE = """ + COUNT(DISTINCT {column}) AS {column}_max_value + """ + + # DISTINCT_COUNT_FREQUENCIES = """ + # (SELECTSTRING_AGG(CAST(COUNT(*) AS VARCHAR) || ':' || CAST({column} AS VARCHAR), ',') + # FROM(SELECT{column} FROM{dremio_dataset} GROUP BY {column}) + # GROUP BY {column} + # ORDER BY COUNT(*) DESC + # ) as {column}_value_frequencies + # """ + + HISTOGRAM_VALUES = """ + (SELECTSTRING_AGG(CAST(COUNT(*) AS VARCHAR) || ':' || CAST({column} AS VARCHAR), ',') + FROM(SELECT{column} FROM{dremio_dataset} GROUP BY {column}) + GROUP BY {column} + ORDER BY COUNT(*) DESC + ) as {column}_value_frequencies + """ + + MAX_VALUE = """ + MAX({column}) AS {column}_max_value + """ + + MIN_VALUE = """ + MIN({column}) AS {column}_min_value + """ + + MEAN_VALUE = """ + AVG({column}) AS {column}_mean_value + """ + + MEDIAN_VALUE = """ + MEDIAN({column}) AS {column}_median_value + """ + + NULL_COUNT_VALUE = """ + SUM(CASEWHEN {column} IS NULL THEN 1 ELSE 0 END) as {column}_null_count + """ + + QUANTILES_VALUE = """ + PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY {column}) as {column}_25th_percentile, + PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY {column}) as {column}_50th_percentile, + PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY {column}) as {column}_75th_percentile + """ + + STDDEV_VALUE = """ + STDDEV({column}) as {column}_stddev_value + """ diff --git a/metadata-service/configuration/src/main/resources/bootstrap_mcps/data-platforms.yaml b/metadata-service/configuration/src/main/resources/bootstrap_mcps/data-platforms.yaml index 24d5da22805cb..f480ec862bc4e 100644 --- a/metadata-service/configuration/src/main/resources/bootstrap_mcps/data-platforms.yaml +++ b/metadata-service/configuration/src/main/resources/bootstrap_mcps/data-platforms.yaml @@ -707,3 +707,13 @@ displayName: SAP Analytics Cloud type: OTHERS logoUrl: "/assets/platforms/saclogo.svg" +- entityUrn: urn:li:dataPlatform:dremio + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: dremio + displayName: Dremio + type: QUERY_ENGINE + logoUrl: "/assets/platforms/dremiologo.png" From 703a3f88abf618514e093a62ee29153898e0e398 Mon Sep 17 00:00:00 2001 From: sagar-salvi-apptware Date: Fri, 11 Oct 2024 18:48:47 +0530 Subject: [PATCH 44/50] refactor: improvement to metadata gathering --- .../ingestion/source/dremio/dremio_api.py | 37 +++++++++---- .../source/dremio/dremio_entities.py | 8 ++- .../source/dremio/dremio_sql_queries.py | 53 ++----------------- 3 files changed, 36 insertions(+), 62 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_api.py b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_api.py index 32440033843ed..da71900a72ce2 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_api.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_api.py @@ -6,7 +6,7 @@ from enum import Enum from itertools import product from time import sleep, time -from typing import Any, Dict, List, Optional, Union +from typing import Any, Deque, Dict, List, Optional, Union from urllib.parse import quote import requests @@ -33,7 +33,7 @@ class DremioEdition(Enum): class DremioAPIOperations: _retry_count: int = 5 - _timeout: int = 10 + _timeout: int = 1800 def __init__(self, connection_args: "DremioSourceConfig") -> None: self.dremio_to_datahub_source_mapper = DremioToDataHubSourceTypeMapping() @@ -221,7 +221,7 @@ def fetch_results(self, job_id: str) -> List[Dict]: elif status["jobState"] == "CANCELED": raise RuntimeError("Query was canceled") - if time() - start_time > 300: # 5 minutes timeout + if time() - start_time > self._timeout: self.cancel_query(job_id) raise TimeoutError("Query execution timed out while fetching results") @@ -367,7 +367,7 @@ def community_get_formatted_tables( return dataset_list - def get_all_tables_and_columns(self) -> List[Dict]: + def get_all_tables_and_columns(self, containers: Deque) -> List[Dict]: if self.edition == DremioEdition.ENTERPRISE: query_template = DremioSQLQueries.QUERY_DATASETS_EE elif self.edition == DremioEdition.CLOUD: @@ -409,14 +409,29 @@ def get_pattern_condition( self.deny_dataset_pattern, table_field, allow=False ) - formatted_query = query_template.format( - schema_pattern=schema_condition, - table_pattern=table_condition, - deny_schema_pattern=deny_schema_condition, - deny_table_pattern=deny_table_condition, - ) + all_tables_and_columns = [] + + for schema in containers: + try: + formatted_query = query_template.format( + schema_pattern=schema_condition, + table_pattern=table_condition, + deny_schema_pattern=deny_schema_condition, + deny_table_pattern=deny_table_condition, + container_name=schema.container_name.lower(), + ) + + all_tables_and_columns.extend( + self.execute_query( + query=formatted_query, + ) + ) + except Exception as exc: + logger.warning( + f"{schema.subclass} {schema.container_name} had no tables or views" + ) + logger.debug(exc) - all_tables_and_columns = self.execute_query(formatted_query) tables = [] if self.edition == DremioEdition.COMMUNITY: diff --git a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_entities.py b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_entities.py index f09055fe2fbc7..004020e0ea13d 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_entities.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_entities.py @@ -335,7 +335,13 @@ def __init__(self, dremio_api: DremioAPIOperations): def set_datasets(self) -> None: if not self.datasets_populated: - for dataset_details in self.dremio_api.get_all_tables_and_columns(): + containers: Deque[DremioContainer] = deque() + containers.extend(self.spaces) # Add DremioSpace elements + containers.extend(self.sources) # Add DremioSource elements + + for dataset_details in self.dremio_api.get_all_tables_and_columns( + containers=containers + ): dremio_dataset = DremioDataset( dataset_details=dataset_details, api_operations=self.dremio_api, diff --git a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_sql_queries.py b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_sql_queries.py index 281a5df82d326..f795a5449ff99 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_sql_queries.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_sql_queries.py @@ -23,6 +23,7 @@ class DremioSQLQueries: AND C.TABLE_NAME = T.TABLE_NAME WHERE T.TABLE_TYPE NOT IN ('SYSTEM_TABLE') + AND LOCATE('{container_name}', LOWER(T.TABLE_SCHEMA)) = 1 ) WHERE 1=1 {schema_pattern} @@ -123,6 +124,7 @@ class DremioSQLQueries: CONCAT(C.TABLE_SCHEMA, '.', C.TABLE_NAME) WHERE V.TYPE NOT IN ('SYSTEM_TABLE') + AND LOCATE('{container_name}', LOWER(PATH)) = 2 ) WHERE 1=1 {schema_pattern} @@ -224,6 +226,7 @@ class DremioSQLQueries: CONCAT(C.TABLE_SCHEMA, '.', C.TABLE_NAME) WHERE V.TYPE NOT IN ('SYSTEM_TABLE') + AND LOCATE('{container_name}', LOWER(PATH)) = 2 ) WHERE 1=1 {schema_pattern} @@ -282,53 +285,3 @@ class DremioSQLQueries: LIMIT {sample_limit} ) """ - - DISTINCT_COUNT_VALUE = """ - COUNT(DISTINCT {column}) AS {column}_max_value - """ - - # DISTINCT_COUNT_FREQUENCIES = """ - # (SELECTSTRING_AGG(CAST(COUNT(*) AS VARCHAR) || ':' || CAST({column} AS VARCHAR), ',') - # FROM(SELECT{column} FROM{dremio_dataset} GROUP BY {column}) - # GROUP BY {column} - # ORDER BY COUNT(*) DESC - # ) as {column}_value_frequencies - # """ - - HISTOGRAM_VALUES = """ - (SELECTSTRING_AGG(CAST(COUNT(*) AS VARCHAR) || ':' || CAST({column} AS VARCHAR), ',') - FROM(SELECT{column} FROM{dremio_dataset} GROUP BY {column}) - GROUP BY {column} - ORDER BY COUNT(*) DESC - ) as {column}_value_frequencies - """ - - MAX_VALUE = """ - MAX({column}) AS {column}_max_value - """ - - MIN_VALUE = """ - MIN({column}) AS {column}_min_value - """ - - MEAN_VALUE = """ - AVG({column}) AS {column}_mean_value - """ - - MEDIAN_VALUE = """ - MEDIAN({column}) AS {column}_median_value - """ - - NULL_COUNT_VALUE = """ - SUM(CASEWHEN {column} IS NULL THEN 1 ELSE 0 END) as {column}_null_count - """ - - QUANTILES_VALUE = """ - PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY {column}) as {column}_25th_percentile, - PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY {column}) as {column}_50th_percentile, - PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY {column}) as {column}_75th_percentile - """ - - STDDEV_VALUE = """ - STDDEV({column}) as {column}_stddev_value - """ From 4283cb66a6b309fbf15f401c347dadd83986c119 Mon Sep 17 00:00:00 2001 From: sagar-salvi-apptware Date: Fri, 11 Oct 2024 18:50:32 +0530 Subject: [PATCH 45/50] fix: Update dremio_entity.py --- .../src/datahub/ingestion/source/dremio/dremio_entities.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_entities.py b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_entities.py index 004020e0ea13d..895d6cbc0281c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_entities.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_entities.py @@ -335,6 +335,8 @@ def __init__(self, dremio_api: DremioAPIOperations): def set_datasets(self) -> None: if not self.datasets_populated: + self.set_containers() + containers: Deque[DremioContainer] = deque() containers.extend(self.spaces) # Add DremioSpace elements containers.extend(self.sources) # Add DremioSource elements From f897b9e60fc105009ca7561b277f9b24afb27e1f Mon Sep 17 00:00:00 2001 From: sagar-salvi-apptware Date: Fri, 11 Oct 2024 18:52:14 +0530 Subject: [PATCH 46/50] fix: Update dremio_sql_queries.py --- .../src/datahub/ingestion/source/dremio/dremio_sql_queries.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_sql_queries.py b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_sql_queries.py index f795a5449ff99..0d46d0ad357b3 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_sql_queries.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_sql_queries.py @@ -118,6 +118,8 @@ class DremioSQLQueries: COLUMN_SIZE FROM INFORMATION_SCHEMA.COLUMNS + WHERE + LOCATE('{container_name}', LOWER(TABLE_SCHEMA)) = 1 ) C ON CONCAT(REPLACE(REPLACE(REPLACE(V.PATH, ', ', '.'), '[', ''), ']', '')) = @@ -220,6 +222,8 @@ class DremioSQLQueries: COLUMN_SIZE FROM INFORMATION_SCHEMA.COLUMNS + WHERE + LOCATE('{container_name}', LOWER(TABLE_SCHEMA)) = 1 ) C ON CONCAT(REPLACE(REPLACE(REPLACE(V.PATH, ', ', '.'), '[', ''), ']', '')) = From ac56777bf7ca661760465afabbcd6b94cae3ea72 Mon Sep 17 00:00:00 2001 From: sagar-salvi-apptware Date: Fri, 11 Oct 2024 18:54:35 +0530 Subject: [PATCH 47/50] refactor: Update dremio_source.py --- .../src/datahub/ingestion/source/dremio/dremio_source.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_source.py b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_source.py index cd5c91cbb5d1e..ddac5bf3a3e30 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_source.py @@ -7,6 +7,7 @@ from datetime import datetime from typing import Any, Dict, Iterable, List, Optional +import datahub.sql_parsing.sqlglot_utils from datahub.emitter.mce_builder import ( make_data_platform_urn, make_dataset_urn_with_platform_instance, @@ -606,3 +607,10 @@ def close(self) -> None: Close any resources held by the source. """ pass + + +def _sql_dialect(platform: str) -> str: + return "trino" + + +datahub.sql_parsing.sqlglot_utils._get_dialect_str = _sql_dialect From 3ce57b912f5389e864514bb829a7b8ce8040bc95 Mon Sep 17 00:00:00 2001 From: sagar-salvi-apptware Date: Thu, 17 Oct 2024 16:48:31 +0530 Subject: [PATCH 48/50] test: add integration test for dremio --- .../integration/dremio/docker-compose.yml | 35 + .../dremio/dremio_mces_golden.json | 1059 +++++++++++++++++ .../integration/dremio/dremio_to_file.yml | 27 + .../integration/dremio/setup_dremio_admin.sh | 34 + .../dremio/test_data/sample.parquet | Bin 0 -> 3090 bytes .../tests/integration/dremio/test_dremio.py | 345 ++++++ 6 files changed, 1500 insertions(+) create mode 100644 metadata-ingestion/tests/integration/dremio/docker-compose.yml create mode 100644 metadata-ingestion/tests/integration/dremio/dremio_mces_golden.json create mode 100644 metadata-ingestion/tests/integration/dremio/dremio_to_file.yml create mode 100755 metadata-ingestion/tests/integration/dremio/setup_dremio_admin.sh create mode 100644 metadata-ingestion/tests/integration/dremio/test_data/sample.parquet create mode 100644 metadata-ingestion/tests/integration/dremio/test_dremio.py diff --git a/metadata-ingestion/tests/integration/dremio/docker-compose.yml b/metadata-ingestion/tests/integration/dremio/docker-compose.yml new file mode 100644 index 0000000000000..b60f894a13e0e --- /dev/null +++ b/metadata-ingestion/tests/integration/dremio/docker-compose.yml @@ -0,0 +1,35 @@ +version: "3" + +services: + # Minio Storage Server + minio: + image: minio/minio:RELEASE.2023-07-21T21-12-44Z + container_name: minio + environment: + - MINIO_ROOT_USER=miniouser + - MINIO_ROOT_PASSWORD=miniopassword + - MINIO_DOMAIN=storage + - MINIO_REGION_NAME=us-east-1 + - MINIO_REGION=us-east-1 + networks: + dremio-network: + ports: + - 9001:9001 + - 9000:9000 + command: ["server", "/data", "--console-address", ":9001"] + # Dremio + dremio: + platform: linux/x86_64 + image: dremio/dremio-oss:latest + ports: + - 9047:9047 + - 31010:31010 + - 32010:32010 + container_name: dremio + networks: + dremio-network: + depends_on: + - minio + +networks: + dremio-network: diff --git a/metadata-ingestion/tests/integration/dremio/dremio_mces_golden.json b/metadata-ingestion/tests/integration/dremio/dremio_mces_golden.json new file mode 100644 index 0000000000000..b6a03761fa4c5 --- /dev/null +++ b/metadata-ingestion/tests/integration/dremio/dremio_mces_golden.json @@ -0,0 +1,1059 @@ +[ +{ + "entityType": "container", + "entityUrn": "urn:li:container:5492d77b2954324ad07fff10d741e2d1", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": {}, + "name": "my_space", + "qualifiedName": "my_space", + "description": "", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:5492d77b2954324ad07fff10d741e2d1", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dremio" + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:3ace723c787c49d7966071cc89d06c9b", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": {}, + "name": "@admin", + "qualifiedName": "@admin", + "description": "# Wikis & Labels\n\n![Gnarly Catalog](https://d33wubrfki0l68.cloudfront.net/c1a54376c45a9276c080f3d10ed25ce61c17bcd2/2b946/img/home/open-source-for-everyone.svg)\n\nYou are reading the wiki for your home space! You can create and edit this information for any source, space, or folder.\n\nThis sidebar always shows the wiki for the current source, space or folder you are browsing.\n\nWhen browsing or previewing datasets, click on the `Open details panel` button to create a wiki or add labels to that dataset.\n\n**Tip:** You can hide the wiki by clicking on the sidebar icon on upper right hand side.", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:5492d77b2954324ad07fff10d741e2d1", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:5492d77b2954324ad07fff10d741e2d1", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Dremio Space" + ] + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:3ace723c787c49d7966071cc89d06c9b", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dremio" + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:3ace723c787c49d7966071cc89d06c9b", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Dremio Space" + ] + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:3ace723c787c49d7966071cc89d06c9b", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:1da1928d2528f84a5e928fca4c3bc75d", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0c593828953ba6a54f29a97639e300e5" + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:1da1928d2528f84a5e928fca4c3bc75d", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Dremio Folder" + ] + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:1da1928d2528f84a5e928fca4c3bc75d", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:1da1928d2528f84a5e928fca4c3bc75d", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dremio" + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:0c593828953ba6a54f29a97639e300e5", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": {}, + "name": "catalog_page", + "qualifiedName": "Samples.samples.dremio.com.tpcds_sf1000.catalog_page", + "description": "", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:0c593828953ba6a54f29a97639e300e5", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Samples", + "urn": "urn:li:container:3acf5b43ce20cbc5b60abbdb57bb2a1a" + }, + { + "id": "samples.dremio.com", + "urn": "urn:li:container:c370d9075a8f1b3a28dbc3603717d466" + }, + { + "id": "tpcds_sf1000", + "urn": "urn:li:container:0c593828953ba6a54f29a97639e300e5" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:1da1928d2528f84a5e928fca4c3bc75d", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": {}, + "name": "1ab266d5-18eb-4780-711d-0fa337fa6c00", + "qualifiedName": "Samples.samples.dremio.com.tpcds_sf1000.catalog_page.1ab266d5-18eb-4780-711d-0fa337fa6c00", + "description": "", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:1da1928d2528f84a5e928fca4c3bc75d", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Samples", + "urn": "urn:li:container:be5a17b95b6903826193185a545fdb38" + }, + { + "id": "samples.dremio.com", + "urn": "urn:li:container:cd673c4bfa428a6c3e4157eed8f4efdd" + }, + { + "id": "tpcds_sf1000", + "urn": "urn:li:container:ad2959fe35f5f1baa446ebb3a9a90d27" + }, + { + "id": "catalog_page", + "urn": "urn:li:container:1da1928d2528f84a5e928fca4c3bc75d" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:0c593828953ba6a54f29a97639e300e5", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:201af44510e34d8245f5515ee71a751f" + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:0c593828953ba6a54f29a97639e300e5", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dremio" + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:973eabd34940a8012ff93f357510bc1d", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": {}, + "name": "Dremio University", + "qualifiedName": "Samples.samples.dremio.com.Dremio University", + "description": "", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:973eabd34940a8012ff93f357510bc1d", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Samples", + "urn": "urn:li:container:64ce2a7c1fe5b4487868e3cc993817d8" + }, + { + "id": "samples.dremio.com", + "urn": "urn:li:container:973eabd34940a8012ff93f357510bc1d" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:63a316133b08a091e919dc8c7a828a4d", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": {}, + "name": "s3", + "qualifiedName": "s3", + "description": "", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:0c593828953ba6a54f29a97639e300e5", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Dremio Folder" + ] + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:973eabd34940a8012ff93f357510bc1d", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:e247722e4a5ae3879de75a2a478ecbbe" + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:973eabd34940a8012ff93f357510bc1d", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dremio" + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:e247722e4a5ae3879de75a2a478ecbbe", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": {}, + "name": "samples.dremio.com", + "qualifiedName": "Samples.samples.dremio.com", + "description": "", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:e247722e4a5ae3879de75a2a478ecbbe", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Samples", + "urn": "urn:li:container:e247722e4a5ae3879de75a2a478ecbbe" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:19724f46e67c95babcd35b49cd200397", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": {}, + "name": "my_folder", + "qualifiedName": "my_space.my_folder", + "description": "", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:19724f46e67c95babcd35b49cd200397", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "my_space", + "urn": "urn:li:container:19724f46e67c95babcd35b49cd200397" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:0c593828953ba6a54f29a97639e300e5", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:973eabd34940a8012ff93f357510bc1d", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Dremio Folder" + ] + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:19724f46e67c95babcd35b49cd200397", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:5492d77b2954324ad07fff10d741e2d1" + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:19724f46e67c95babcd35b49cd200397", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dremio" + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:e247722e4a5ae3879de75a2a478ecbbe", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:e8cccb9f7a06aeafad68f76e30c62f68" + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:19724f46e67c95babcd35b49cd200397", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Dremio Folder" + ] + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:19724f46e67c95babcd35b49cd200397", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:e247722e4a5ae3879de75a2a478ecbbe", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dremio" + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:973eabd34940a8012ff93f357510bc1d", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:e247722e4a5ae3879de75a2a478ecbbe", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Dremio Folder" + ] + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:e247722e4a5ae3879de75a2a478ecbbe", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:63a316133b08a091e919dc8c7a828a4d", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Dremio Source" + ] + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:63a316133b08a091e919dc8c7a828a4d", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dremio" + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:63a316133b08a091e919dc8c7a828a4d", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:e8cccb9f7a06aeafad68f76e30c62f68", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": {}, + "name": "Samples", + "qualifiedName": "Samples", + "description": "", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:e8cccb9f7a06aeafad68f76e30c62f68", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dremio" + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:b6e7d0c364ccc53ccb02b438999fda9a", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:63a316133b08a091e919dc8c7a828a4d" + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:e8cccb9f7a06aeafad68f76e30c62f68", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:e8cccb9f7a06aeafad68f76e30c62f68", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Dremio Source" + ] + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:b6e7d0c364ccc53ccb02b438999fda9a", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Dremio Folder" + ] + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:b6e7d0c364ccc53ccb02b438999fda9a", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:b6e7d0c364ccc53ccb02b438999fda9a", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dremio" + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:b6e7d0c364ccc53ccb02b438999fda9a", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": {}, + "name": "warehouse", + "qualifiedName": "s3.warehouse", + "description": "", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:b6e7d0c364ccc53ccb02b438999fda9a", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "s3", + "urn": "urn:li:container:b6e7d0c364ccc53ccb02b438999fda9a" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:201af44510e34d8245f5515ee71a751f", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:e247722e4a5ae3879de75a2a478ecbbe" + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:201af44510e34d8245f5515ee71a751f", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Dremio Folder" + ] + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:201af44510e34d8245f5515ee71a751f", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:201af44510e34d8245f5515ee71a751f", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dremio" + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:201af44510e34d8245f5515ee71a751f", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": {}, + "name": "tpcds_sf1000", + "qualifiedName": "Samples.samples.dremio.com.tpcds_sf1000", + "description": "", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:201af44510e34d8245f5515ee71a751f", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Samples", + "urn": "urn:li:container:ef36cbff27a47ed33a76d0232fc3295c" + }, + { + "id": "samples.dremio.com", + "urn": "urn:li:container:201af44510e34d8245f5515ee71a751f" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/dremio/dremio_to_file.yml b/metadata-ingestion/tests/integration/dremio/dremio_to_file.yml new file mode 100644 index 0000000000000..742b97f9d79c9 --- /dev/null +++ b/metadata-ingestion/tests/integration/dremio/dremio_to_file.yml @@ -0,0 +1,27 @@ +source: + type: dremio + config: + # Coordinates + hostname: localhost + port: 9047 + tls: false + + # Credentials + authentication_method: password + username: admin + password: "2310Admin1234!@" + + include_query_lineage: false + + source_mappings: + - platform: s3 + platform_name: samples + + schema_pattern: + allow: + - ".*" + +sink: + type: file + config: + filename: "./dremio_mces.json" diff --git a/metadata-ingestion/tests/integration/dremio/setup_dremio_admin.sh b/metadata-ingestion/tests/integration/dremio/setup_dremio_admin.sh new file mode 100755 index 0000000000000..d4b7099ba25cc --- /dev/null +++ b/metadata-ingestion/tests/integration/dremio/setup_dremio_admin.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +# Set variables +DREMIO_URL="http://localhost:9047" +ADMIN_USER="admin" +ADMIN_PASSWORD="2310Admin1234!@" +ADMIN_FIRST_NAME="Admin" +ADMIN_LAST_NAME="User" +ADMIN_EMAIL="admin@dremio.com" + +# Wait for Dremio to become available +until $(curl --output /dev/null --silent --head --fail "$DREMIO_URL"); do + echo "Waiting for Dremio to start..." + sleep 5 +done + +# Create admin user +echo "Creating Dremio Admin User..." +RESPONSE=$(curl -s -o /dev/null -w "%{http_code}" -X PUT "$DREMIO_URL/apiv2/bootstrap/firstuser" \ + -H "Content-Type: application/json" \ + -d "{ + \"userName\": \"$ADMIN_USER\", + \"firstName\": \"$ADMIN_FIRST_NAME\", + \"lastName\": \"$ADMIN_LAST_NAME\", + \"email\": \"$ADMIN_EMAIL\", + \"password\": \"$ADMIN_PASSWORD\" + }") + +if [ $RESPONSE -eq 200 ]; then + echo "Admin user created successfully!" +else + echo "Failed to create admin user. HTTP response: $RESPONSE" + exit 1 +fi diff --git a/metadata-ingestion/tests/integration/dremio/test_data/sample.parquet b/metadata-ingestion/tests/integration/dremio/test_data/sample.parquet new file mode 100644 index 0000000000000000000000000000000000000000..9e7073d27b720a37ee8ef77c6bc22ad5fde9a772 GIT binary patch literal 3090 zcmcInOK&1a5NA2Q0NKcymtpYCFv}ZT zE9I0^lwXi@PC4WknHV;o}{FWSC?3e0yN zI$>rU&b8lA=r4z#b@}33$m8Rjn!-B0jQ7Uj{lgJ@5n|rNu5Sn2N30XUv;_-7aL%y# zNjML`b^{ITnyiv&k}YW+mQ+J*>zYbfhhiV_7XsTy+iuF>+*B3h=G_0&FU zP-y@^I=nwSLf^PT->vzVEZphnT;6>o?;nu7e+YPFY}*ToZ6WUy4#)V$G9>TIRAA0I z{_FZQ?`vY+_S;VUFP_jpZb<#Ikrk#_MD)od^#SJOC**dxzPNEiOxAmLg?xEnugft_ zI2_TVg8(m67i0SB-Dymoj;M?6(WIVk9HGBALf>vcSj%=+SidVQ--|fnJRP~A8Y0U; zi9K?XvPJABY-j;ILU}(pMmZ$0_HOJB@-jZZFMpp^ANpTPGIYQ75xV!Q^1k+XnGgfZ z1B84#oW~MPMeJ}txUPemsi?zqxuy3EvlDz0`0VqYK+ry(Q=Ym&rN`?a>2DAZYDy3k zE;LnF&gW@1$a!~U*@9+wKSpHDo?(ra$%fh$6|pOVQM0G(uXX}c5Ndb{WY19{ zQq*Ormef?`E}{Py7L@67L}TItlJ*uM*)6$@G^M{K7`?}~Dt23KNa=M@*Q7Q)0_4uD zk=Smx1_ZpXwmVwOB-BV`H@q7TLgIsC=vn4&d^Z|=g}6;zDm@pT*j>+kzwr?n(fT3A zxCmb|o?#rr$vRGBT*s+8r6rSd`~e{z-SYW*%09zPtx$dz;{gUeScwtX!2&bJhZy9O zA7>4{SL4gE%3h%_Wy)R2Jl39TNvU!=sAr8aO;1>oQPNeue3fms>Ov&k$)<8mX>S;j zgt$$Mb~MRIcWbBFec)NORF=pXqEPPC(urPGi0DKY@Gi3&*fx_5u-T9!iLhEpPIx`# zuo|X(BOZ||$#^ANip%^N_=Gi4h=&W!qLxo#O%lrCs!(bu{1NfDrwGH&1^BFvax+-* zh9sO(`I@2Y5?{KQ@zIuB<%?PFi9(@Y18jsw=YFK(T`X>IQP0(GD zms{STD#S0VmBK8RC7H#&f}K-{H8N1ktMV4n6+R96r3T5jWR^Qp2JR~}cRyEEVc3w( z%T>9V!FH01%Dj#5+znvopHWREH=$w{6-|7t9clN9RN zqCTb0=pPgPMs4p58z-Pj+ZEa!Mr3j>o>8R?rP3|kQU7>a+mkccrTij6hsK6DWoU62 zAbbm2%-(5_J`KEn3DWev0DXX@@lp#@Wc(@O0+!f**;BiZWy^X@AHl%*kqg+*pO5@t Ox&5Hu8H7&Y-@v~?UeDP8 literal 0 HcmV?d00001 diff --git a/metadata-ingestion/tests/integration/dremio/test_dremio.py b/metadata-ingestion/tests/integration/dremio/test_dremio.py new file mode 100644 index 0000000000000..3aa83ea0e7349 --- /dev/null +++ b/metadata-ingestion/tests/integration/dremio/test_dremio.py @@ -0,0 +1,345 @@ +import json +import os +import random +import subprocess +import time + +import boto3 +import pytest +import requests +from freezegun import freeze_time + +from tests.test_helpers import mce_helpers +from tests.test_helpers.click_helpers import run_datahub_cmd +from tests.test_helpers.docker_helpers import wait_for_port + +FROZEN_TIME = "2023-10-15 07:00:00" +MINIO_PORT = 9000 +# Dremio server credentials +DREMIO_HOST = "http://localhost:9047" +DREMIO_USERNAME = "admin" +DREMIO_PASSWORD = "2310Admin1234!@" # Set your Dremio admin password +MINIO_S3_ENDPOINT = "minio:9000" +AWS_ACCESS_KEY = "miniouser" +AWS_SECRET_KEY = "miniopassword" +AWS_ROOT_PATH = "/warehouse" + + +def is_minio_up(container_name: str) -> bool: + """A cheap way to figure out if postgres is responsive on a container""" + + cmd = f"docker logs {container_name} 2>&1 | grep '1 Online'" + ret = subprocess.run( + cmd, + shell=True, + ) + return ret.returncode == 0 + + +def create_spaces_and_folders(dremio_token): + """ + Create spaces and folders in Dremio + """ + url = f"{DREMIO_HOST}/api/v3/catalog" + headers = { + "Authorization": f"_dremio{dremio_token}", + "Content-Type": "application/json", + } + + # Create Space + payload = {"entityType": "space", "name": "my_space"} + response = requests.post(url, headers=headers, data=json.dumps(payload)) + assert response.status_code == 200, f"Failed to create space: {response.text}" + + # Create Folder inside Space + json_data = {"entityType": "folder", "path": ["my_space", "my_folder"]} + response = requests.post(url, headers=headers, data=json.dumps(json_data)) + assert response.status_code == 200, f"Failed to create folder: {response.text}" + + +def create_sample_source(dremio_token): + url = f"{DREMIO_HOST}/apiv2/source/Samples" + headers = { + "Authorization": f"_dremio{dremio_token}", + "Content-Type": "application/json", + } + + payload = { + "config": { + "externalBucketList": ["samples.dremio.com"], + "credentialType": "NONE", + "secure": False, + "propertyList": [], + }, + "name": "Samples", + "accelerationRefreshPeriod": 3600000, + "accelerationGracePeriod": 10800000, + "accelerationNeverRefresh": True, + "accelerationNeverExpire": True, + "accelerationActivePolicyType": "PERIOD", + "accelerationRefreshSchedule": "0 0 8 * * *", + "accelerationRefreshOnDataChanges": False, + "type": "S3", + } + + response = requests.put(url, headers=headers, data=json.dumps(payload)) + assert response.status_code == 200, f"Failed to add dataset: {response.text}" + + +def create_s3_source(dremio_token): + url = f"{DREMIO_HOST}/apiv2/source/s3" + headers = { + "Authorization": f"_dremio{dremio_token}", + "Content-Type": "application/json", + } + + payload = { + "name": "s3", + "config": { + "credentialType": "ACCESS_KEY", + "accessKey": AWS_ACCESS_KEY, + "accessSecret": AWS_SECRET_KEY, + "secure": False, + "externalBucketList": ["warehouse"], + "enableAsync": True, + "enableFileStatusCheck": True, + "rootPath": "/", + "defaultCtasFormat": "ICEBERG", + "propertyList": [ + {"name": "fs.s3a.access.key", "value": AWS_ACCESS_KEY}, + {"name": "fs.s3a.secret.key", "value": AWS_SECRET_KEY}, + { + "name": "fs.s3a.aws.credentials.provider", + "value": "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider", + }, + {"name": "fs.s3a.endpoint", "value": MINIO_S3_ENDPOINT}, + {"name": "fs.s3a.path.style.access", "value": "True"}, + {"name": "dremio.s3.compat", "value": "True"}, + {"name": "fs.s3a.connection.ssl.enabled", "value": "False"}, + ], + "compatibilityMode": True, + "whitelistedBuckets": [], + "isCachingEnabled": True, + "maxCacheSpacePct": 100, + }, + "accelerationRefreshPeriod": 3600000, + "accelerationGracePeriod": 10800000, + "accelerationActivePolicyType": "PERIOD", + "accelerationRefreshSchedule": "0 0 8 * * *", + "accelerationRefreshOnDataChanges": False, + "metadataPolicy": { + "deleteUnavailableDatasets": True, + "autoPromoteDatasets": False, + "namesRefreshMillis": 3600000, + "datasetDefinitionRefreshAfterMillis": 3600000, + "datasetDefinitionExpireAfterMillis": 10800000, + "authTTLMillis": 86400000, + "updateMode": "PREFETCH_QUERIED", + }, + "type": "S3", + "accessControlList": {"userControls": [], "roleControls": []}, + } + + response = requests.put(url, headers=headers, data=json.dumps(payload)) + assert response.status_code == 200, f"Failed to add s3 datasource: {response.text}" + + +def upload_dataset(dremio_token): + + headers = { + "Authorization": f"_dremio{dremio_token}", + "Content-Type": "application/json", + } + + url = f"{DREMIO_HOST}/apiv2/source/s3/file_format/warehouse/sample.parquet" + payload = {"ignoreOtherFileFormats": False, "type": "Parquet"} + + response = requests.put(url, headers=headers, data=json.dumps(payload)) + assert response.status_code == 200, f"Failed to add dataset: {response.text}" + + url = f"{DREMIO_HOST}/apiv2/source/Samples/file_format/samples.dremio.com/NYC-weather.csv" + + payload = { + "fieldDelimiter": ",", + "quote": '"', + "comment": "#", + "lineDelimiter": "\r\n", + "escape": '"', + "extractHeader": False, + "trimHeader": True, + "skipFirstLine": False, + "type": "Text", + } + + response = requests.put(url, headers=headers, data=json.dumps(payload)) + assert response.status_code == 200, f"Failed to add dataset: {response.text}" + + url = f"{DREMIO_HOST}/apiv2/source/Samples/file_format/samples.dremio.com/Dremio%20University/oracle-departments.xlsx" + + payload = {"extractHeader": True, "hasMergedCells": False, "type": "Excel"} + + response = requests.put(url, headers=headers, data=json.dumps(payload)) + assert response.status_code == 200, f"Failed to add dataset: {response.text}" + + url = f"{DREMIO_HOST}/apiv2/source/Samples/file_format/samples.dremio.com/Dremio%20University/googleplaystore.csv" + + payload = { + "fieldDelimiter": ",", + "quote": '"', + "comment": "#", + "lineDelimiter": "\r\n", + "escape": '"', + "extractHeader": False, + "trimHeader": True, + "skipFirstLine": False, + "type": "Text", + } + + response = requests.put(url, headers=headers, data=json.dumps(payload)) + assert response.status_code == 200, f"Failed to add dataset: {response.text}" + + url = f"{DREMIO_HOST}/apiv2/source/Samples/file_format/samples.dremio.com/tpcds_sf1000/catalog_page/1ab266d5-18eb-4780-711d-0fa337fa6c00/0_0_0.parquet" + payload = {"ignoreOtherFileFormats": False, "type": "Parquet"} + + response = requests.put(url, headers=headers, data=json.dumps(payload)) + assert response.status_code == 200, f"Failed to add dataset: {response.text}" + + +def add_datasets_to_space(dremio_token): + headers = { + "Authorization": f"_dremio{dremio_token}", + "Content-Type": "application/json", + } + + base_url = "{}/apiv2/{}" + + sql_version_number = "".join([str(random.randint(0, 9)) for _ in range(16)]) + url = base_url.format( + DREMIO_HOST, + f"datasets/new_tmp_untitled_sql?newVersion={sql_version_number}&limit=0", + ) + payload = { + "context": [], + "sql": 'SELECT * FROM s3.warehouse."sample.parquet"', + "references": {}, + } + + response = requests.post(url, headers=headers, data=json.dumps(payload)) + assert response.status_code == 200, f"Failed to create view: {response.text}" + time.sleep(5) + url = f"{DREMIO_HOST}/apiv2/dataset/tmp.UNTITLED/version/{sql_version_number}/save?as=%22my_space%22.%22my_folder%22.raw" + response = requests.post(url, headers=headers) + assert response.status_code == 200, f"Failed to add view in folder: {response.text}" + + +def execute_sql_query(token, query): + url = f"{DREMIO_HOST}/api/v3/sql" + headers = {"Content-Type": "application/json", "Authorization": f"_dremio{token}"} + data = json.dumps({"sql": query}) + + response = requests.post(url, headers=headers, data=data) + + assert ( + response.status_code == 200 + ), f"Failed to execute SQL query: {response.status_code}, {response.text}" + + +@pytest.fixture(scope="module") +def dremio_setup(): + token = dremio_token() + create_sample_source(token) + create_s3_source(token) + create_spaces_and_folders(token) + upload_dataset(token) + add_datasets_to_space(token) + + +def dremio_token(): + """ + Get Dremio authentication token + """ + url = f"{DREMIO_HOST}/apiv2/login" + headers = {"Content-Type": "application/json"} + payload = {"userName": DREMIO_USERNAME, "password": DREMIO_PASSWORD} + + response = requests.post(url, headers=headers, data=json.dumps(payload)) + response.raise_for_status() # Raise exception if request failed + return response.json()["token"] + + +@pytest.fixture(scope="module") +def test_resources_dir(pytestconfig): + return pytestconfig.rootpath / "tests/integration/dremio" + + +@pytest.fixture(scope="module") +def mock_dremio_service(docker_compose_runner, pytestconfig, test_resources_dir): + # Spin up Dremio and MinIO (for mock S3) services using Docker Compose. + with docker_compose_runner( + test_resources_dir / "docker-compose.yml", "dremio" + ) as docker_services: + wait_for_port(docker_services, "dremio", 9047, timeout=120) + wait_for_port( + docker_services, + "minio", + MINIO_PORT, + timeout=120, + checker=lambda: is_minio_up("minio"), + ) + + # Ensure the admin and data setup scripts have the right permissions + subprocess.run( + ["chmod", "+x", f"{test_resources_dir}/setup_dremio_admin.sh"], check=True + ) + + # Run the setup_dremio_admin.sh script + admin_setup_cmd = f"{test_resources_dir}/setup_dremio_admin.sh" + subprocess.run(admin_setup_cmd, shell=True, check=True) + + yield docker_compose_runner + + +@pytest.fixture(scope="module", autouse=True) +def s3_bkt(mock_dremio_service): + s3 = boto3.resource( + "s3", + endpoint_url=f"http://localhost:{MINIO_PORT}", + aws_access_key_id="miniouser", + aws_secret_access_key="miniopassword", + ) + bkt = s3.Bucket("warehouse") + bkt.create() + return bkt + + +@pytest.fixture(scope="module", autouse=True) +def populate_minio(pytestconfig, s3_bkt): + test_resources_dir = pytestconfig.rootpath / "tests/integration/dremio/test_data/" + + for root, _dirs, files in os.walk(test_resources_dir): + for file in files: + full_path = os.path.join(root, file) + rel_path = os.path.relpath(full_path, test_resources_dir) + s3_bkt.upload_file(full_path, rel_path) + yield + + +@freeze_time(FROZEN_TIME) +@pytest.mark.integration +def test_dremio_ingest( + test_resources_dir, + dremio_setup, + pytestconfig, + tmp_path, +): + # Run the metadata ingestion pipeline. + config_file = (test_resources_dir / "dremio_to_file.yml").resolve() + run_datahub_cmd(["ingest", "-c", f"{config_file}"], tmp_path=tmp_path) + + # Verify the output. + mce_helpers.check_golden_file( + pytestconfig, + output_path=tmp_path / "dremio_mces.json", + golden_path=test_resources_dir / "dremio_mces_golden.json", + ignore_paths=[], + ) From e1ee817a7e454342e31323d1b51080fd5d6a7622 Mon Sep 17 00:00:00 2001 From: sagar-salvi-apptware Date: Thu, 17 Oct 2024 23:21:56 +0530 Subject: [PATCH 49/50] fix: added minor changes + fix testcase --- .../ingestion/source/dremio/dremio_api.py | 10 +- .../source/dremio/dremio_entities.py | 5 +- .../source/dremio/dremio_sql_queries.py | 4 +- .../dremio/dremio_mces_golden.json | 2225 +++++++++++++++-- .../tests/integration/dremio/test_dremio.py | 93 +- 5 files changed, 2050 insertions(+), 287 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_api.py b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_api.py index da71900a72ce2..c1e5fa5b34efc 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_api.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_api.py @@ -300,6 +300,7 @@ def community_get_formatted_tables( dataset_list = [] column_dictionary: Dict[str, List[Dict]] = defaultdict(list) + ordinal_position = 0 for record in tables_and_columns: if not record.get("COLUMN_NAME"): continue @@ -311,13 +312,17 @@ def community_get_formatted_tables( column_dictionary[table_full_path].append( { "name": record["COLUMN_NAME"], - "ordinal_position": record["ORDINAL_POSITION"], + "ordinal_position": record.get( + "ORDINAL_POSITION", ordinal_position + ), "is_nullable": record["IS_NULLABLE"], "data_type": record["DATA_TYPE"], "column_size": record["COLUMN_SIZE"], } ) + ordinal_position += 1 + if record.get("TABLE_SCHEMA") not in schema_list: schema_list.append(record.get("TABLE_SCHEMA")) @@ -412,6 +417,7 @@ def get_pattern_condition( all_tables_and_columns = [] for schema in containers: + formatted_query = "" try: formatted_query = query_template.format( schema_pattern=schema_condition, @@ -428,7 +434,7 @@ def get_pattern_condition( ) except Exception as exc: logger.warning( - f"{schema.subclass} {schema.container_name} had no tables or views" + f"{schema.subclass} {schema.container_name} had no tables or views {formatted_query}" ) logger.debug(exc) diff --git a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_entities.py b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_entities.py index 895d6cbc0281c..3cb48b578324c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_entities.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_entities.py @@ -224,13 +224,14 @@ def __init__( else: self.dataset_type = DremioDatasetType.TABLE + self.owner = dataset_details.get("OWNER") + self.owner_type = dataset_details.get("OWNER_TYPE") + if api_operations.edition in ( DremioEdition.ENTERPRISE, DremioEdition.CLOUD, ): self.created = dataset_details.get("CREATED", "") - self.owner = dataset_details.get("OWNER") - self.owner_type = dataset_details.get("OWNER_TYPE") self.format_type = dataset_details.get("FORMAT_TYPE") self.description = api_operations.get_description_for_resource( diff --git a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_sql_queries.py b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_sql_queries.py index 0d46d0ad357b3..1db03b3c04af6 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_sql_queries.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_sql_queries.py @@ -13,11 +13,11 @@ class DremioSQLQueries: C.COLUMN_SIZE FROM INFORMATION_SCHEMA."TABLES" T - LEFT JOININFORMATION_SCHEMA.VIEWS V ON + LEFT JOIN INFORMATION_SCHEMA.VIEWS V ON V.TABLE_CATALOG = T.TABLE_CATALOG AND V.TABLE_SCHEMA = T.TABLE_SCHEMA AND V.TABLE_NAME = T.TABLE_NAME - INNER JOININFORMATION_SCHEMA.COLUMNS C ON + INNER JOIN INFORMATION_SCHEMA.COLUMNS C ON C.TABLE_CATALOG = T.TABLE_CATALOG AND C.TABLE_SCHEMA = T.TABLE_SCHEMA AND C.TABLE_NAME = T.TABLE_NAME diff --git a/metadata-ingestion/tests/integration/dremio/dremio_mces_golden.json b/metadata-ingestion/tests/integration/dremio/dremio_mces_golden.json index b6a03761fa4c5..3969ee4b68b71 100644 --- a/metadata-ingestion/tests/integration/dremio/dremio_mces_golden.json +++ b/metadata-ingestion/tests/integration/dremio/dremio_mces_golden.json @@ -1,14 +1,14 @@ [ { "entityType": "container", - "entityUrn": "urn:li:container:5492d77b2954324ad07fff10d741e2d1", + "entityUrn": "urn:li:container:19724f46e67c95babcd35b49cd200397", "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { "json": { "customProperties": {}, - "name": "my_space", - "qualifiedName": "my_space", + "name": "my_folder", + "qualifiedName": "my_space.my_folder", "description": "", "env": "PROD" } @@ -21,12 +21,17 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:5492d77b2954324ad07fff10d741e2d1", + "entityUrn": "urn:li:container:19724f46e67c95babcd35b49cd200397", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "browsePathsV2", "aspect": { "json": { - "platform": "urn:li:dataPlatform:dremio" + "path": [ + { + "id": "my_space", + "urn": "urn:li:container:19724f46e67c95babcd35b49cd200397" + } + ] } }, "systemMetadata": { @@ -37,16 +42,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:3ace723c787c49d7966071cc89d06c9b", + "entityUrn": "urn:li:container:19724f46e67c95babcd35b49cd200397", "changeType": "UPSERT", - "aspectName": "containerProperties", + "aspectName": "container", "aspect": { "json": { - "customProperties": {}, - "name": "@admin", - "qualifiedName": "@admin", - "description": "# Wikis & Labels\n\n![Gnarly Catalog](https://d33wubrfki0l68.cloudfront.net/c1a54376c45a9276c080f3d10ed25ce61c17bcd2/2b946/img/home/open-source-for-everyone.svg)\n\nYou are reading the wiki for your home space! You can create and edit this information for any source, space, or folder.\n\nThis sidebar always shows the wiki for the current source, space or folder you are browsing.\n\nWhen browsing or previewing datasets, click on the `Open details panel` button to create a wiki or add labels to that dataset.\n\n**Tip:** You can hide the wiki by clicking on the sidebar icon on upper right hand side.", - "env": "PROD" + "container": "urn:li:container:5492d77b2954324ad07fff10d741e2d1" } }, "systemMetadata": { @@ -57,12 +58,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:5492d77b2954324ad07fff10d741e2d1", + "entityUrn": "urn:li:container:19724f46e67c95babcd35b49cd200397", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "removed": false + "platform": "urn:li:dataPlatform:dremio" } }, "systemMetadata": { @@ -73,13 +74,13 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:5492d77b2954324ad07fff10d741e2d1", + "entityUrn": "urn:li:container:19724f46e67c95babcd35b49cd200397", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { "json": { "typeNames": [ - "Dremio Space" + "Dremio Folder" ] } }, @@ -91,12 +92,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:3ace723c787c49d7966071cc89d06c9b", + "entityUrn": "urn:li:container:19724f46e67c95babcd35b49cd200397", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "status", "aspect": { "json": { - "platform": "urn:li:dataPlatform:dremio" + "removed": false } }, "systemMetadata": { @@ -107,14 +108,16 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:3ace723c787c49d7966071cc89d06c9b", + "entityUrn": "urn:li:container:0c593828953ba6a54f29a97639e300e5", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "containerProperties", "aspect": { "json": { - "typeNames": [ - "Dremio Space" - ] + "customProperties": {}, + "name": "catalog_page", + "qualifiedName": "Samples.samples.dremio.com.tpcds_sf1000.catalog_page", + "description": "", + "env": "PROD" } }, "systemMetadata": { @@ -125,12 +128,25 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:3ace723c787c49d7966071cc89d06c9b", + "entityUrn": "urn:li:container:0c593828953ba6a54f29a97639e300e5", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "browsePathsV2", "aspect": { "json": { - "removed": false + "path": [ + { + "id": "Samples", + "urn": "urn:li:container:3acf5b43ce20cbc5b60abbdb57bb2a1a" + }, + { + "id": "samples.dremio.com", + "urn": "urn:li:container:c370d9075a8f1b3a28dbc3603717d466" + }, + { + "id": "tpcds_sf1000", + "urn": "urn:li:container:0c593828953ba6a54f29a97639e300e5" + } + ] } }, "systemMetadata": { @@ -141,12 +157,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:1da1928d2528f84a5e928fca4c3bc75d", + "entityUrn": "urn:li:container:0c593828953ba6a54f29a97639e300e5", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:0c593828953ba6a54f29a97639e300e5" + "container": "urn:li:container:201af44510e34d8245f5515ee71a751f" } }, "systemMetadata": { @@ -157,14 +173,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:1da1928d2528f84a5e928fca4c3bc75d", + "entityUrn": "urn:li:container:0c593828953ba6a54f29a97639e300e5", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "typeNames": [ - "Dremio Folder" - ] + "platform": "urn:li:dataPlatform:dremio" } }, "systemMetadata": { @@ -175,12 +189,14 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:1da1928d2528f84a5e928fca4c3bc75d", + "entityUrn": "urn:li:container:0c593828953ba6a54f29a97639e300e5", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "subTypes", "aspect": { "json": { - "removed": false + "typeNames": [ + "Dremio Folder" + ] } }, "systemMetadata": { @@ -191,12 +207,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:1da1928d2528f84a5e928fca4c3bc75d", + "entityUrn": "urn:li:container:0c593828953ba6a54f29a97639e300e5", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "status", "aspect": { "json": { - "platform": "urn:li:dataPlatform:dremio" + "removed": false } }, "systemMetadata": { @@ -207,14 +223,14 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:0c593828953ba6a54f29a97639e300e5", + "entityUrn": "urn:li:container:201af44510e34d8245f5515ee71a751f", "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { "json": { "customProperties": {}, - "name": "catalog_page", - "qualifiedName": "Samples.samples.dremio.com.tpcds_sf1000.catalog_page", + "name": "tpcds_sf1000", + "qualifiedName": "Samples.samples.dremio.com.tpcds_sf1000", "description": "", "env": "PROD" } @@ -227,7 +243,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:0c593828953ba6a54f29a97639e300e5", + "entityUrn": "urn:li:container:201af44510e34d8245f5515ee71a751f", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -235,15 +251,11 @@ "path": [ { "id": "Samples", - "urn": "urn:li:container:3acf5b43ce20cbc5b60abbdb57bb2a1a" + "urn": "urn:li:container:ef36cbff27a47ed33a76d0232fc3295c" }, { "id": "samples.dremio.com", - "urn": "urn:li:container:c370d9075a8f1b3a28dbc3603717d466" - }, - { - "id": "tpcds_sf1000", - "urn": "urn:li:container:0c593828953ba6a54f29a97639e300e5" + "urn": "urn:li:container:201af44510e34d8245f5515ee71a751f" } ] } @@ -256,16 +268,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:1da1928d2528f84a5e928fca4c3bc75d", + "entityUrn": "urn:li:container:201af44510e34d8245f5515ee71a751f", "changeType": "UPSERT", - "aspectName": "containerProperties", + "aspectName": "container", "aspect": { "json": { - "customProperties": {}, - "name": "1ab266d5-18eb-4780-711d-0fa337fa6c00", - "qualifiedName": "Samples.samples.dremio.com.tpcds_sf1000.catalog_page.1ab266d5-18eb-4780-711d-0fa337fa6c00", - "description": "", - "env": "PROD" + "container": "urn:li:container:e247722e4a5ae3879de75a2a478ecbbe" } }, "systemMetadata": { @@ -276,29 +284,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:1da1928d2528f84a5e928fca4c3bc75d", + "entityUrn": "urn:li:container:201af44510e34d8245f5515ee71a751f", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "path": [ - { - "id": "Samples", - "urn": "urn:li:container:be5a17b95b6903826193185a545fdb38" - }, - { - "id": "samples.dremio.com", - "urn": "urn:li:container:cd673c4bfa428a6c3e4157eed8f4efdd" - }, - { - "id": "tpcds_sf1000", - "urn": "urn:li:container:ad2959fe35f5f1baa446ebb3a9a90d27" - }, - { - "id": "catalog_page", - "urn": "urn:li:container:1da1928d2528f84a5e928fca4c3bc75d" - } - ] + "platform": "urn:li:dataPlatform:dremio" } }, "systemMetadata": { @@ -309,12 +300,14 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:0c593828953ba6a54f29a97639e300e5", + "entityUrn": "urn:li:container:201af44510e34d8245f5515ee71a751f", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "subTypes", "aspect": { "json": { - "container": "urn:li:container:201af44510e34d8245f5515ee71a751f" + "typeNames": [ + "Dremio Folder" + ] } }, "systemMetadata": { @@ -325,12 +318,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:0c593828953ba6a54f29a97639e300e5", + "entityUrn": "urn:li:container:201af44510e34d8245f5515ee71a751f", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "status", "aspect": { "json": { - "platform": "urn:li:dataPlatform:dremio" + "removed": false } }, "systemMetadata": { @@ -386,16 +379,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:63a316133b08a091e919dc8c7a828a4d", + "entityUrn": "urn:li:container:973eabd34940a8012ff93f357510bc1d", "changeType": "UPSERT", - "aspectName": "containerProperties", + "aspectName": "container", "aspect": { "json": { - "customProperties": {}, - "name": "s3", - "qualifiedName": "s3", - "description": "", - "env": "PROD" + "container": "urn:li:container:e247722e4a5ae3879de75a2a478ecbbe" } }, "systemMetadata": { @@ -406,14 +395,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:0c593828953ba6a54f29a97639e300e5", + "entityUrn": "urn:li:container:973eabd34940a8012ff93f357510bc1d", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "typeNames": [ - "Dremio Folder" - ] + "platform": "urn:li:dataPlatform:dremio" } }, "systemMetadata": { @@ -426,10 +413,12 @@ "entityType": "container", "entityUrn": "urn:li:container:973eabd34940a8012ff93f357510bc1d", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "subTypes", "aspect": { "json": { - "container": "urn:li:container:e247722e4a5ae3879de75a2a478ecbbe" + "typeNames": [ + "Dremio Folder" + ] } }, "systemMetadata": { @@ -442,10 +431,10 @@ "entityType": "container", "entityUrn": "urn:li:container:973eabd34940a8012ff93f357510bc1d", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "status", "aspect": { "json": { - "platform": "urn:li:dataPlatform:dremio" + "removed": false } }, "systemMetadata": { @@ -456,14 +445,14 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:e247722e4a5ae3879de75a2a478ecbbe", + "entityUrn": "urn:li:container:e8cccb9f7a06aeafad68f76e30c62f68", "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { "json": { "customProperties": {}, - "name": "samples.dremio.com", - "qualifiedName": "Samples.samples.dremio.com", + "name": "Samples", + "qualifiedName": "Samples", "description": "", "env": "PROD" } @@ -476,17 +465,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:e247722e4a5ae3879de75a2a478ecbbe", + "entityUrn": "urn:li:container:e8cccb9f7a06aeafad68f76e30c62f68", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "path": [ - { - "id": "Samples", - "urn": "urn:li:container:e247722e4a5ae3879de75a2a478ecbbe" - } - ] + "platform": "urn:li:dataPlatform:dremio" } }, "systemMetadata": { @@ -497,16 +481,14 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:19724f46e67c95babcd35b49cd200397", + "entityUrn": "urn:li:container:e8cccb9f7a06aeafad68f76e30c62f68", "changeType": "UPSERT", - "aspectName": "containerProperties", + "aspectName": "subTypes", "aspect": { "json": { - "customProperties": {}, - "name": "my_folder", - "qualifiedName": "my_space.my_folder", - "description": "", - "env": "PROD" + "typeNames": [ + "Dremio Source" + ] } }, "systemMetadata": { @@ -517,17 +499,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:19724f46e67c95babcd35b49cd200397", + "entityUrn": "urn:li:container:e8cccb9f7a06aeafad68f76e30c62f68", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "status", "aspect": { "json": { - "path": [ - { - "id": "my_space", - "urn": "urn:li:container:19724f46e67c95babcd35b49cd200397" - } - ] + "removed": false } }, "systemMetadata": { @@ -538,12 +515,16 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:0c593828953ba6a54f29a97639e300e5", + "entityUrn": "urn:li:container:63a316133b08a091e919dc8c7a828a4d", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "containerProperties", "aspect": { "json": { - "removed": false + "customProperties": {}, + "name": "s3", + "qualifiedName": "s3", + "description": "", + "env": "PROD" } }, "systemMetadata": { @@ -554,14 +535,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:973eabd34940a8012ff93f357510bc1d", + "entityUrn": "urn:li:container:63a316133b08a091e919dc8c7a828a4d", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "typeNames": [ - "Dremio Folder" - ] + "platform": "urn:li:dataPlatform:dremio" } }, "systemMetadata": { @@ -572,14 +551,16 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:19724f46e67c95babcd35b49cd200397", + "entityUrn": "urn:li:container:63a316133b08a091e919dc8c7a828a4d", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "subTypes", "aspect": { "json": { - "container": "urn:li:container:5492d77b2954324ad07fff10d741e2d1" - } - }, + "typeNames": [ + "Dremio Source" + ] + } + }, "systemMetadata": { "lastObserved": 1697353200000, "runId": "dremio-2023_10_15-07_00_00", @@ -588,12 +569,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:19724f46e67c95babcd35b49cd200397", + "entityUrn": "urn:li:container:63a316133b08a091e919dc8c7a828a4d", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "status", "aspect": { "json": { - "platform": "urn:li:dataPlatform:dremio" + "removed": false } }, "systemMetadata": { @@ -606,10 +587,14 @@ "entityType": "container", "entityUrn": "urn:li:container:e247722e4a5ae3879de75a2a478ecbbe", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "containerProperties", "aspect": { "json": { - "container": "urn:li:container:e8cccb9f7a06aeafad68f76e30c62f68" + "customProperties": {}, + "name": "samples.dremio.com", + "qualifiedName": "Samples.samples.dremio.com", + "description": "", + "env": "PROD" } }, "systemMetadata": { @@ -620,13 +605,16 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:19724f46e67c95babcd35b49cd200397", + "entityUrn": "urn:li:container:e247722e4a5ae3879de75a2a478ecbbe", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "Dremio Folder" + "path": [ + { + "id": "Samples", + "urn": "urn:li:container:e247722e4a5ae3879de75a2a478ecbbe" + } ] } }, @@ -638,12 +626,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:19724f46e67c95babcd35b49cd200397", + "entityUrn": "urn:li:container:e247722e4a5ae3879de75a2a478ecbbe", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "container", "aspect": { "json": { - "removed": false + "container": "urn:li:container:e8cccb9f7a06aeafad68f76e30c62f68" } }, "systemMetadata": { @@ -670,12 +658,14 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:973eabd34940a8012ff93f357510bc1d", + "entityUrn": "urn:li:container:e247722e4a5ae3879de75a2a478ecbbe", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "subTypes", "aspect": { "json": { - "removed": false + "typeNames": [ + "Dremio Folder" + ] } }, "systemMetadata": { @@ -688,12 +678,10 @@ "entityType": "container", "entityUrn": "urn:li:container:e247722e4a5ae3879de75a2a478ecbbe", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "status", "aspect": { "json": { - "typeNames": [ - "Dremio Folder" - ] + "removed": false } }, "systemMetadata": { @@ -704,12 +692,16 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:e247722e4a5ae3879de75a2a478ecbbe", + "entityUrn": "urn:li:container:3ace723c787c49d7966071cc89d06c9b", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "containerProperties", "aspect": { "json": { - "removed": false + "customProperties": {}, + "name": "@admin", + "qualifiedName": "@admin", + "description": "# Wikis & Labels\n\n![Gnarly Catalog](https://d33wubrfki0l68.cloudfront.net/c1a54376c45a9276c080f3d10ed25ce61c17bcd2/2b946/img/home/open-source-for-everyone.svg)\n\nYou are reading the wiki for your home space! You can create and edit this information for any source, space, or folder.\n\nThis sidebar always shows the wiki for the current source, space or folder you are browsing.\n\nWhen browsing or previewing datasets, click on the `Open details panel` button to create a wiki or add labels to that dataset.\n\n**Tip:** You can hide the wiki by clicking on the sidebar icon on upper right hand side.", + "env": "PROD" } }, "systemMetadata": { @@ -720,14 +712,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:63a316133b08a091e919dc8c7a828a4d", + "entityUrn": "urn:li:container:3ace723c787c49d7966071cc89d06c9b", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "typeNames": [ - "Dremio Source" - ] + "platform": "urn:li:dataPlatform:dremio" } }, "systemMetadata": { @@ -738,12 +728,14 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:63a316133b08a091e919dc8c7a828a4d", + "entityUrn": "urn:li:container:3ace723c787c49d7966071cc89d06c9b", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "subTypes", "aspect": { "json": { - "platform": "urn:li:dataPlatform:dremio" + "typeNames": [ + "Dremio Space" + ] } }, "systemMetadata": { @@ -754,7 +746,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:63a316133b08a091e919dc8c7a828a4d", + "entityUrn": "urn:li:container:3ace723c787c49d7966071cc89d06c9b", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -770,14 +762,14 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:e8cccb9f7a06aeafad68f76e30c62f68", + "entityUrn": "urn:li:container:5492d77b2954324ad07fff10d741e2d1", "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { "json": { "customProperties": {}, - "name": "Samples", - "qualifiedName": "Samples", + "name": "my_space", + "qualifiedName": "my_space", "description": "", "env": "PROD" } @@ -790,7 +782,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:e8cccb9f7a06aeafad68f76e30c62f68", + "entityUrn": "urn:li:container:5492d77b2954324ad07fff10d741e2d1", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { @@ -806,12 +798,14 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:b6e7d0c364ccc53ccb02b438999fda9a", + "entityUrn": "urn:li:container:5492d77b2954324ad07fff10d741e2d1", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "subTypes", "aspect": { "json": { - "container": "urn:li:container:63a316133b08a091e919dc8c7a828a4d" + "typeNames": [ + "Dremio Space" + ] } }, "systemMetadata": { @@ -822,7 +816,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:e8cccb9f7a06aeafad68f76e30c62f68", + "entityUrn": "urn:li:container:5492d77b2954324ad07fff10d741e2d1", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -838,14 +832,16 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:e8cccb9f7a06aeafad68f76e30c62f68", + "entityUrn": "urn:li:container:b6e7d0c364ccc53ccb02b438999fda9a", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "containerProperties", "aspect": { "json": { - "typeNames": [ - "Dremio Source" - ] + "customProperties": {}, + "name": "warehouse", + "qualifiedName": "s3.warehouse", + "description": "", + "env": "PROD" } }, "systemMetadata": { @@ -858,11 +854,14 @@ "entityType": "container", "entityUrn": "urn:li:container:b6e7d0c364ccc53ccb02b438999fda9a", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "Dremio Folder" + "path": [ + { + "id": "s3", + "urn": "urn:li:container:b6e7d0c364ccc53ccb02b438999fda9a" + } ] } }, @@ -876,10 +875,10 @@ "entityType": "container", "entityUrn": "urn:li:container:b6e7d0c364ccc53ccb02b438999fda9a", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "container", "aspect": { "json": { - "removed": false + "container": "urn:li:container:63a316133b08a091e919dc8c7a828a4d" } }, "systemMetadata": { @@ -908,12 +907,46 @@ "entityType": "container", "entityUrn": "urn:li:container:b6e7d0c364ccc53ccb02b438999fda9a", "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Dremio Folder" + ] + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:b6e7d0c364ccc53ccb02b438999fda9a", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:1da1928d2528f84a5e928fca4c3bc75d", + "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { "json": { "customProperties": {}, - "name": "warehouse", - "qualifiedName": "s3.warehouse", + "name": "1ab266d5-18eb-4780-711d-0fa337fa6c00", + "qualifiedName": "Samples.samples.dremio.com.tpcds_sf1000.catalog_page.1ab266d5-18eb-4780-711d-0fa337fa6c00", "description": "", "env": "PROD" } @@ -926,15 +959,27 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:b6e7d0c364ccc53ccb02b438999fda9a", + "entityUrn": "urn:li:container:1da1928d2528f84a5e928fca4c3bc75d", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "s3", - "urn": "urn:li:container:b6e7d0c364ccc53ccb02b438999fda9a" + "id": "Samples", + "urn": "urn:li:container:be5a17b95b6903826193185a545fdb38" + }, + { + "id": "samples.dremio.com", + "urn": "urn:li:container:cd673c4bfa428a6c3e4157eed8f4efdd" + }, + { + "id": "tpcds_sf1000", + "urn": "urn:li:container:ad2959fe35f5f1baa446ebb3a9a90d27" + }, + { + "id": "catalog_page", + "urn": "urn:li:container:1da1928d2528f84a5e928fca4c3bc75d" } ] } @@ -947,12 +992,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:201af44510e34d8245f5515ee71a751f", + "entityUrn": "urn:li:container:1da1928d2528f84a5e928fca4c3bc75d", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:e247722e4a5ae3879de75a2a478ecbbe" + "container": "urn:li:container:0c593828953ba6a54f29a97639e300e5" } }, "systemMetadata": { @@ -963,7 +1008,23 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:201af44510e34d8245f5515ee71a751f", + "entityUrn": "urn:li:container:1da1928d2528f84a5e928fca4c3bc75d", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dremio" + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:1da1928d2528f84a5e928fca4c3bc75d", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -981,7 +1042,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:201af44510e34d8245f5515ee71a751f", + "entityUrn": "urn:li:container:1da1928d2528f84a5e928fca4c3bc75d", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -996,13 +1057,21 @@ } }, { - "entityType": "container", - "entityUrn": "urn:li:container:201af44510e34d8245f5515ee71a751f", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.nyc-weather.csv,PROD)", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "datasetProperties", "aspect": { "json": { - "platform": "urn:li:dataPlatform:dremio" + "customProperties": {}, + "externalUrl": "http://localhost:9047/api/v3/source/\"Samples\"/\"samples.dremio.com\".\"NYC-weather.csv\"", + "name": "NYC-weather.csv", + "qualifiedName": "Samples.samples.dremio.com.NYC-weather.csv", + "description": "", + "created": { + "time": 0 + }, + "tags": [] } }, "systemMetadata": { @@ -1012,17 +1081,15 @@ } }, { - "entityType": "container", - "entityUrn": "urn:li:container:201af44510e34d8245f5515ee71a751f", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.nyc-weather.csv,PROD)", "changeType": "UPSERT", - "aspectName": "containerProperties", + "aspectName": "subTypes", "aspect": { "json": { - "customProperties": {}, - "name": "tpcds_sf1000", - "qualifiedName": "Samples.samples.dremio.com.tpcds_sf1000", - "description": "", - "env": "PROD" + "typeNames": [ + "Table" + ] } }, "systemMetadata": { @@ -1032,8 +1099,24 @@ } }, { - "entityType": "container", - "entityUrn": "urn:li:container:201af44510e34d8245f5515ee71a751f", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.nyc-weather.csv,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dremio" + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.nyc-weather.csv,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -1041,11 +1124,1725 @@ "path": [ { "id": "Samples", - "urn": "urn:li:container:ef36cbff27a47ed33a76d0232fc3295c" + "urn": "urn:li:container:e8cccb9f7a06aeafad68f76e30c62f68" }, { "id": "samples.dremio.com", - "urn": "urn:li:container:201af44510e34d8245f5515ee71a751f" + "urn": "urn:li:container:e247722e4a5ae3879de75a2a478ecbbe" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.nyc-weather.csv,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:e247722e4a5ae3879de75a2a478ecbbe" + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.nyc-weather.csv,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "Samples.samples.dremio.com.NYC-weather.csv", + "platform": "urn:li:dataPlatform:dremio", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "F", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "character varying(65536)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "G", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "character varying(65536)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "H", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "character varying(65536)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "I", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "character varying(65536)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "A", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "character varying(65536)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "B", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "character varying(65536)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "C", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "character varying(65536)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "D", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "character varying(65536)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "E", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "character varying(65536)", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.nyc-weather.csv,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.nyc-weather.csv,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:s3,/samples.dremio.com/NYC-weather.csv,PROD)", + "type": "COPY" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.dremio university.googleplaystore.csv,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": {}, + "externalUrl": "http://localhost:9047/api/v3/source/\"Samples\"/\"samples.dremio.com\".\"Dremio University\".\"googleplaystore.csv\"", + "name": "googleplaystore.csv", + "qualifiedName": "Samples.samples.dremio.com.Dremio University.googleplaystore.csv", + "description": "", + "created": { + "time": 0 + }, + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.dremio university.googleplaystore.csv,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.dremio university.googleplaystore.csv,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dremio" + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.dremio university.googleplaystore.csv,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Samples", + "urn": "urn:li:container:e8cccb9f7a06aeafad68f76e30c62f68" + }, + { + "id": "samples.dremio.com", + "urn": "urn:li:container:e247722e4a5ae3879de75a2a478ecbbe" + }, + { + "id": "Dremio University", + "urn": "urn:li:container:973eabd34940a8012ff93f357510bc1d" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.dremio university.googleplaystore.csv,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:973eabd34940a8012ff93f357510bc1d" + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.dremio university.googleplaystore.csv,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "Samples.samples.dremio.com.Dremio University.googleplaystore.csv", + "platform": "urn:li:dataPlatform:dremio", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "A", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "character varying(65536)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "B", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "character varying(65536)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "C", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "character varying(65536)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "D", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "character varying(65536)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "E", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "character varying(65536)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "F", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "character varying(65536)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "G", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "character varying(65536)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "H", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "character varying(65536)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "I", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "character varying(65536)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "J", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "character varying(65536)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "K", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "character varying(65536)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "L", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "character varying(65536)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "M", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "character varying(65536)", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.dremio university.googleplaystore.csv,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.dremio university.googleplaystore.csv,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:s3,/samples.dremio.com/Dremio University/googleplaystore.csv,PROD)", + "type": "COPY" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.dremio university.oracle-departments.xlsx,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": {}, + "externalUrl": "http://localhost:9047/api/v3/source/\"Samples\"/\"samples.dremio.com\".\"Dremio University\".\"oracle-departments.xlsx\"", + "name": "oracle-departments.xlsx", + "qualifiedName": "Samples.samples.dremio.com.Dremio University.oracle-departments.xlsx", + "description": "", + "created": { + "time": 0 + }, + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.dremio university.oracle-departments.xlsx,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.dremio university.oracle-departments.xlsx,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dremio" + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.dremio university.oracle-departments.xlsx,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Samples", + "urn": "urn:li:container:e8cccb9f7a06aeafad68f76e30c62f68" + }, + { + "id": "samples.dremio.com", + "urn": "urn:li:container:e247722e4a5ae3879de75a2a478ecbbe" + }, + { + "id": "Dremio University", + "urn": "urn:li:container:973eabd34940a8012ff93f357510bc1d" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.dremio university.oracle-departments.xlsx,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:973eabd34940a8012ff93f357510bc1d" + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.dremio university.oracle-departments.xlsx,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "Samples.samples.dremio.com.Dremio University.oracle-departments.xlsx", + "platform": "urn:li:dataPlatform:dremio", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "DEPARTMENT_NAME", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "character varying(65536)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "MANAGER_ID", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "double(53)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "DEPARTMENT_ID", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "double(53)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "LOCATION_ID", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "double(53)", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.dremio university.oracle-departments.xlsx,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.dremio university.oracle-departments.xlsx,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:s3,/samples.dremio.com/Dremio University/oracle-departments.xlsx,PROD)", + "type": "COPY" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.my_space.my_folder.raw,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": {}, + "externalUrl": "http://localhost:9047/api/v3/space/\"my_space\"/\"my_folder\".\"raw\"", + "name": "raw", + "qualifiedName": "my_space.my_folder.raw", + "description": "", + "created": { + "time": 0 + }, + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.my_space.my_folder.raw,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "View" + ] + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.my_space.my_folder.raw,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dremio" + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.my_space.my_folder.raw,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "my_space", + "urn": "urn:li:container:5492d77b2954324ad07fff10d741e2d1" + }, + { + "id": "my_folder", + "urn": "urn:li:container:19724f46e67c95babcd35b49cd200397" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.my_space.my_folder.raw,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:19724f46e67c95babcd35b49cd200397" + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.my_space.my_folder.raw,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "my_space.my_folder.raw", + "platform": "urn:li:dataPlatform:dremio", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "id", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "bigint(64)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "name", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "character varying(65536)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "age", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "bigint(64)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "salary", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "bigint(64)", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.my_space.my_folder.raw,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.tpcds_sf1000.catalog_page.1ab266d5-18eb-4780-711d-0fa337fa6c00.0_0_0.parquet,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": {}, + "externalUrl": "http://localhost:9047/api/v3/source/\"Samples\"/\"samples.dremio.com\".\"tpcds_sf1000\".\"catalog_page\".\"1ab266d5-18eb-4780-711d-0fa337fa6c00\".\"0_0_0.parquet\"", + "name": "0_0_0.parquet", + "qualifiedName": "Samples.samples.dremio.com.tpcds_sf1000.catalog_page.1ab266d5-18eb-4780-711d-0fa337fa6c00.0_0_0.parquet", + "description": "", + "created": { + "time": 0 + }, + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.tpcds_sf1000.catalog_page.1ab266d5-18eb-4780-711d-0fa337fa6c00.0_0_0.parquet,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.tpcds_sf1000.catalog_page.1ab266d5-18eb-4780-711d-0fa337fa6c00.0_0_0.parquet,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dremio" + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.tpcds_sf1000.catalog_page.1ab266d5-18eb-4780-711d-0fa337fa6c00.0_0_0.parquet,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Samples", + "urn": "urn:li:container:e8cccb9f7a06aeafad68f76e30c62f68" + }, + { + "id": "samples.dremio.com", + "urn": "urn:li:container:e247722e4a5ae3879de75a2a478ecbbe" + }, + { + "id": "tpcds_sf1000", + "urn": "urn:li:container:201af44510e34d8245f5515ee71a751f" + }, + { + "id": "catalog_page", + "urn": "urn:li:container:0c593828953ba6a54f29a97639e300e5" + }, + { + "id": "1ab266d5-18eb-4780-711d-0fa337fa6c00", + "urn": "urn:li:container:1da1928d2528f84a5e928fca4c3bc75d" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.tpcds_sf1000.catalog_page.1ab266d5-18eb-4780-711d-0fa337fa6c00.0_0_0.parquet,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:1da1928d2528f84a5e928fca4c3bc75d" + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.tpcds_sf1000.catalog_page.1ab266d5-18eb-4780-711d-0fa337fa6c00.0_0_0.parquet,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "Samples.samples.dremio.com.tpcds_sf1000.catalog_page.1ab266d5-18eb-4780-711d-0fa337fa6c00.0_0_0.parquet", + "platform": "urn:li:dataPlatform:dremio", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "cp_start_date_sk", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "bigint(64)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "cp_catalog_page_sk", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "bigint(64)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "cp_catalog_page_id", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "character varying(65536)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "cp_end_date_sk", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "bigint(64)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "cp_department", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "character varying(65536)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "cp_catalog_number", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "bigint(64)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "cp_catalog_page_number", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "bigint(64)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "cp_description", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "character varying(65536)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "cp_type", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "character varying(65536)", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.tpcds_sf1000.catalog_page.1ab266d5-18eb-4780-711d-0fa337fa6c00.0_0_0.parquet,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.tpcds_sf1000.catalog_page.1ab266d5-18eb-4780-711d-0fa337fa6c00.0_0_0.parquet,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:s3,/samples.dremio.com/tpcds_sf1000/catalog_page/1ab266d5-18eb-4780-711d-0fa337fa6c00/0_0_0.parquet,PROD)", + "type": "COPY" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.s3.warehouse.sample.parquet,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": {}, + "externalUrl": "http://localhost:9047/api/v3/source/\"s3\"/\"warehouse\".\"sample.parquet\"", + "name": "sample.parquet", + "qualifiedName": "s3.warehouse.sample.parquet", + "description": "", + "created": { + "time": 0 + }, + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.s3.warehouse.sample.parquet,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.s3.warehouse.sample.parquet,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dremio" + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.s3.warehouse.sample.parquet,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "s3", + "urn": "urn:li:container:63a316133b08a091e919dc8c7a828a4d" + }, + { + "id": "warehouse", + "urn": "urn:li:container:b6e7d0c364ccc53ccb02b438999fda9a" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.s3.warehouse.sample.parquet,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:b6e7d0c364ccc53ccb02b438999fda9a" + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.s3.warehouse.sample.parquet,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "s3.warehouse.sample.parquet", + "platform": "urn:li:dataPlatform:dremio", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "id", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "bigint(64)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "name", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "character varying(65536)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "age", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "bigint(64)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "salary", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "bigint(64)", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.s3.warehouse.sample.parquet,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.s3.warehouse.sample.parquet,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:s3,/warehouse/sample.parquet,PROD)", + "type": "COPY" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.my_space.my_folder.raw,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 1697353200000, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dremio,s3.warehouse.sample.parquet,PROD)", + "type": "VIEW", + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adremio%2Cdremio.my_space.my_folder.raw%2CPROD%29" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adremio%2Cdremio.my_space.my_folder.raw%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "SELECT\n *\nFROM s3.warehouse.\"sample.parquet\"", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "lastModified": { + "time": 1697353200000, + "actor": "urn:li:corpuser:_ingestion" + } + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adremio%2Cdremio.my_space.my_folder.raw%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:dremio,s3.warehouse.sample.parquet,PROD)" + }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.my_space.my_folder.raw,PROD)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adremio%2Cdremio.my_space.my_folder.raw%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dremio" + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.s3.warehouse.sample.parquet,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 1697353200000, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:s3,/warehouse/sample.parquet,PROD)", + "type": "COPY" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.dremio university.googleplaystore.csv,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 1697353200000, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:s3,/samples.dremio.com/Dremio University/googleplaystore.csv,PROD)", + "type": "COPY" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.dremio university.oracle-departments.xlsx,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 1697353200000, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:s3,/samples.dremio.com/Dremio University/oracle-departments.xlsx,PROD)", + "type": "COPY" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.nyc-weather.csv,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 1697353200000, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:s3,/samples.dremio.com/NYC-weather.csv,PROD)", + "type": "COPY" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.tpcds_sf1000.catalog_page.1ab266d5-18eb-4780-711d-0fa337fa6c00.0_0_0.parquet,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 1697353200000, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:s3,/samples.dremio.com/tpcds_sf1000/catalog_page/1ab266d5-18eb-4780-711d-0fa337fa6c00/0_0_0.parquet,PROD)", + "type": "COPY" } ] } diff --git a/metadata-ingestion/tests/integration/dremio/test_dremio.py b/metadata-ingestion/tests/integration/dremio/test_dremio.py index 3aa83ea0e7349..2ebfde139b9af 100644 --- a/metadata-ingestion/tests/integration/dremio/test_dremio.py +++ b/metadata-ingestion/tests/integration/dremio/test_dremio.py @@ -1,8 +1,6 @@ import json import os -import random import subprocess -import time import boto3 import pytest @@ -36,15 +34,11 @@ def is_minio_up(container_name: str) -> bool: return ret.returncode == 0 -def create_spaces_and_folders(dremio_token): +def create_spaces_and_folders(headers): """ Create spaces and folders in Dremio """ url = f"{DREMIO_HOST}/api/v3/catalog" - headers = { - "Authorization": f"_dremio{dremio_token}", - "Content-Type": "application/json", - } # Create Space payload = {"entityType": "space", "name": "my_space"} @@ -57,12 +51,8 @@ def create_spaces_and_folders(dremio_token): assert response.status_code == 200, f"Failed to create folder: {response.text}" -def create_sample_source(dremio_token): +def create_sample_source(headers): url = f"{DREMIO_HOST}/apiv2/source/Samples" - headers = { - "Authorization": f"_dremio{dremio_token}", - "Content-Type": "application/json", - } payload = { "config": { @@ -86,12 +76,8 @@ def create_sample_source(dremio_token): assert response.status_code == 200, f"Failed to add dataset: {response.text}" -def create_s3_source(dremio_token): +def create_s3_source(headers): url = f"{DREMIO_HOST}/apiv2/source/s3" - headers = { - "Authorization": f"_dremio{dremio_token}", - "Content-Type": "application/json", - } payload = { "name": "s3", @@ -144,12 +130,7 @@ def create_s3_source(dremio_token): assert response.status_code == 200, f"Failed to add s3 datasource: {response.text}" -def upload_dataset(dremio_token): - - headers = { - "Authorization": f"_dremio{dremio_token}", - "Content-Type": "application/json", - } +def upload_dataset(headers): url = f"{DREMIO_HOST}/apiv2/source/s3/file_format/warehouse/sample.parquet" payload = {"ignoreOtherFileFormats": False, "type": "Parquet"} @@ -205,56 +186,19 @@ def upload_dataset(dremio_token): assert response.status_code == 200, f"Failed to add dataset: {response.text}" -def add_datasets_to_space(dremio_token): - headers = { - "Authorization": f"_dremio{dremio_token}", - "Content-Type": "application/json", - } - - base_url = "{}/apiv2/{}" - - sql_version_number = "".join([str(random.randint(0, 9)) for _ in range(16)]) - url = base_url.format( - DREMIO_HOST, - f"datasets/new_tmp_untitled_sql?newVersion={sql_version_number}&limit=0", - ) +def create_view(headers): + url = f"{DREMIO_HOST}/api/v3/catalog" payload = { - "context": [], + "entityType": "dataset", + "type": "VIRTUAL_DATASET", + "path": ["my_space", "my_folder", "raw"], "sql": 'SELECT * FROM s3.warehouse."sample.parquet"', - "references": {}, } - response = requests.post(url, headers=headers, data=json.dumps(payload)) assert response.status_code == 200, f"Failed to create view: {response.text}" - time.sleep(5) - url = f"{DREMIO_HOST}/apiv2/dataset/tmp.UNTITLED/version/{sql_version_number}/save?as=%22my_space%22.%22my_folder%22.raw" - response = requests.post(url, headers=headers) - assert response.status_code == 200, f"Failed to add view in folder: {response.text}" - - -def execute_sql_query(token, query): - url = f"{DREMIO_HOST}/api/v3/sql" - headers = {"Content-Type": "application/json", "Authorization": f"_dremio{token}"} - data = json.dumps({"sql": query}) - - response = requests.post(url, headers=headers, data=data) - - assert ( - response.status_code == 200 - ), f"Failed to execute SQL query: {response.status_code}, {response.text}" - - -@pytest.fixture(scope="module") -def dremio_setup(): - token = dremio_token() - create_sample_source(token) - create_s3_source(token) - create_spaces_and_folders(token) - upload_dataset(token) - add_datasets_to_space(token) -def dremio_token(): +def dremio_header(): """ Get Dremio authentication token """ @@ -264,7 +208,22 @@ def dremio_token(): response = requests.post(url, headers=headers, data=json.dumps(payload)) response.raise_for_status() # Raise exception if request failed - return response.json()["token"] + + headers = { + "Content-Type": "application/json", + "Authorization": f"_dremio{response.json()['token']}", + } + return headers + + +@pytest.fixture(scope="module") +def dremio_setup(): + headers = dremio_header() + create_sample_source(headers) + create_s3_source(headers) + create_spaces_and_folders(headers) + upload_dataset(headers) + create_view(headers) @pytest.fixture(scope="module") From b23e0030e478f29628a9b34cb8d3097aec0d6628 Mon Sep 17 00:00:00 2001 From: sagar-salvi-apptware Date: Mon, 21 Oct 2024 13:41:22 +0530 Subject: [PATCH 50/50] fix: PR Comments --- .../docs/sources/dremio/README.md | 23 +++ .../docs/sources/dremio/dremio_pre.md | 45 +++++ .../ingestion/source/dremio/dremio_api.py | 36 ---- .../ingestion/source/dremio/dremio_aspects.py | 3 +- .../ingestion/source/dremio/dremio_config.py | 160 ++++++++-------- .../dremio/dremio_datahub_source_mapping.py | 10 +- .../ingestion/source/dremio/dremio_source.py | 175 ++++++++---------- 7 files changed, 231 insertions(+), 221 deletions(-) create mode 100644 metadata-ingestion/docs/sources/dremio/README.md create mode 100644 metadata-ingestion/docs/sources/dremio/dremio_pre.md diff --git a/metadata-ingestion/docs/sources/dremio/README.md b/metadata-ingestion/docs/sources/dremio/README.md new file mode 100644 index 0000000000000..e36e928e1eddc --- /dev/null +++ b/metadata-ingestion/docs/sources/dremio/README.md @@ -0,0 +1,23 @@ +### Concept Mapping + +- **Dremio Datasets**: Mapped to DataHub’s `Dataset` entity. + - A dataset can be physical or virtual. +- **Lineage**: Mapped to DataHub’s `UpstreamLineage` aspect, representing the flow of data between datasets and columns. +- **Containers**: Spaces, folders, and sources in Dremio are mapped to DataHub’s `Container` aspect, organizing datasets logically. + +Here's a table for **Concept Mapping** between Dremio and DataHub to provide a clear overview of how entities and concepts in Dremio are mapped to corresponding entities in DataHub: + +| **Dremio Concept** | **DataHub Entity/Aspect** | **Description** | | +| --- | --- | --- | --- | +| **Physical Dataset** | `Dataset` | A dataset directly queried from an external source without modifications. | | +| **Virtual Dataset** | `Dataset` | A dataset built from SQL-based transformations on other datasets. | | +| **Spaces** | `Container` | Top-level organizational unit in Dremio, used to group datasets. Mapped to DataHub’s `Container` aspect. | | +| **Folders** | `Container` | Substructure inside spaces, used for organizing datasets. Mapped as a `Container` in DataHub. | | +| **Sources** | `Container` | External data sources connected to Dremio (e.g., S3, databases). Represented as a `Container` in DataHub. | | +| **Column Lineage** | `ColumnLineage` | Lineage between columns in datasets, showing how individual columns are transformed across datasets. | | +| **Dataset Lineage** | `UpstreamLineage` | Lineage between datasets, tracking the flow and transformations between different datasets. | | +| **Ownership (Dataset)** | `Ownership` | Ownership information for datasets, representing the technical owner in DataHub’s `Ownership` aspect. | | +| **Glossary Terms** | `GlossaryTerms` | Business terms associated with datasets, providing context. Mapped as `GlossaryTerms` in DataHub. | | +| **Schema Metadata** | `SchemaMetadata` | Schema details (columns, data types) for datasets. Mapped to DataHub’s `SchemaMetadata` aspect. | | +| **SQL Transformations** | `Dataset` (with lineage) | SQL queries in Dremio that transform datasets. Represented as `Dataset` in DataHub, with lineage showing dependency. | | +| **Queries** | `Query` (if mapped) | Historical SQL queries executed on Dremio datasets. These can be tracked for audit purposes in DataHub. | | \ No newline at end of file diff --git a/metadata-ingestion/docs/sources/dremio/dremio_pre.md b/metadata-ingestion/docs/sources/dremio/dremio_pre.md new file mode 100644 index 0000000000000..eb94b17ae48b3 --- /dev/null +++ b/metadata-ingestion/docs/sources/dremio/dremio_pre.md @@ -0,0 +1,45 @@ +### Setup + +This integration pulls metadata directly from the Dremio APIs. + +You'll need to have a Dremio instance up and running with access to the necessary datasets, and API access should be enabled with a valid token. + +**Dremio instance can be one of following**: + + - Dremio Cloud (Fully managed cloud SaaS) + - Standard + - Enterprise + - Dremio Software (self-managed on own infrastructure / on-premise) + - Community (oss) + - Enterprise + +The API token should have the necessary permissions to **read metadata** and **retrieve lineage**. + +#### Steps to Get the Required Information + +1. **Generate an API Token**: + + - Log in to your Dremio instance. + - Navigate to your user profile in the top-right corner. + - Select **Generate API Token** to create an API token for programmatic access. + - Ensure that the API token has sufficient permissions to access datasets, spaces, sources, and lineage. + +2. **Identify the API Endpoint**: + + - The Dremio API endpoint typically follows this format: + `https:///api/v3/` + - This endpoint is used to query metadata and lineage information. + +3. **Get the Space, Folder, and Dataset Details**: + - To identify specific datasets or containers (spaces, folders, sources), navigate to the Dremio web interface. + - Explore the **Spaces** and **Sources** sections to identify the datasets you need to retrieve metadata for. +4. **Permissions**: + - The token should have **read-only** or **admin** permissions that allow it to: + - View all datasets (physical and virtual). + - Access all spaces, folders, and sources. + - Retrieve dataset and column-level lineage information. +5. **Verify External Data Source Permissions**: + - If Dremio is connected to external data sources (e.g., AWS S3, relational databases), ensure that Dremio has access to the credentials required for querying those sources. + + +Ensure your API token has the correct permissions to interact with the Dremio metadata. diff --git a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_api.py b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_api.py index c1e5fa5b34efc..e59624d4ba2fa 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_api.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_api.py @@ -613,42 +613,6 @@ def get_description_for_resource(self, resource_id: str) -> Optional[str]: ) return None - def get_source_type( - self, - dremio_source_type: str, - datahub_source_type: Optional[str], - ) -> Optional[str]: - """ - Get Dremio wiki entry for a given resource_id. - """ - - lookup_datahub_source_type = ( - self.dremio_to_datahub_source_mapper.get_datahub_source_type( - dremio_source_type=dremio_source_type, - ) - ) - - if lookup_datahub_source_type: - return lookup_datahub_source_type - - self.dremio_to_datahub_source_mapper.add_mapping( - dremio_source_type=dremio_source_type, - datahub_source_type=datahub_source_type, - ) - return datahub_source_type - - def get_source_category( - self, - dremio_source_type: str, - ) -> Optional[str]: - """ - Get Dremio wiki entry for a given resource_id. - """ - - return self.dremio_to_datahub_source_mapper.get_category( - source_type=dremio_source_type, - ) - def get_containers_for_location( self, resource_id: str, path: List[str] ) -> List[Dict[str, str]]: diff --git a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_aspects.py b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_aspects.py index 77d136d44ec68..d49f5b803b396 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_aspects.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_aspects.py @@ -41,7 +41,6 @@ DatasetPropertiesClass, DateTypeClass, DomainsClass, - FabricTypeClass, GlossaryTermAssociationClass, GlossaryTermInfoClass, GlossaryTermsClass, @@ -147,9 +146,9 @@ def __init__( platform: str, profiler: DremioProfiler, base_url: str, + env: str, domain: Optional[str] = None, platform_instance: Optional[str] = None, - env: Optional[Union[FabricTypeClass, str]] = FabricTypeClass.PROD, profiling_enabled: bool = False, ): self.platform = platform diff --git a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_config.py b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_config.py index 3ac18f6e1113e..cacb498adbcf8 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_config.py @@ -5,67 +5,18 @@ from pydantic import Field, validator from datahub.configuration.common import AllowDenyPattern, ConfigModel +from datahub.configuration.source_common import ( + EnvConfigMixin, + PlatformInstanceConfigMixin, +) from datahub.ingestion.source.ge_profiling_config import GEProfilingConfig from datahub.ingestion.source.state.stateful_ingestion_base import ( StatefulIngestionConfigBase, ) -from datahub.metadata.schema_classes import FabricTypeClass - - -class ProfileConfig(GEProfilingConfig): - partition_profiling_enabled: bool = Field( - default=False, - description="Partition profiling disabled for Dremio.", - ) - include_field_median_value: bool = Field( - default=False, - description="Median causes a number of issues in Dremio.", - ) - query_timeout: int = Field( - default=300, description="Time before cancelling Dremio profiling query" - ) - - row_count: bool = True - column_count: bool = True - sample_values: bool = True - - -class DremioSourceMapping(ConfigModel): - platform: Optional[str] = Field( - default=None, - description="Source connection made by Dremio (e.g. S3, Snowflake)", - ) - platform_name: Optional[str] = Field( - default=None, - description="Alias of platform in Dremio connection", - ) - platform_instance: Optional[str] = Field( - default=None, - description="Platform instance of source connection in Datahub", - ) - dremio_source_type: Optional[str] = Field( - default=None, - description="Source connection made by Dremio (e.g. S3, Snowflake)", - ) - env: Optional[str] = Field( - default=FabricTypeClass.PROD, - description="ENV in Datahub of source connection made by Dremio (e.g. PROD)", - ) - root_path: Optional[str] = Field( - default=None, - description="Root path of source - Extracted from Dremio API", - hidden_from_docs=True, - ) - database_name: Optional[str] = Field( - default=None, - description="Database of source - Extracted from Dremio API", - hidden_from_docs=True, - ) +from datahub.ingestion.source.usage.usage_common import BaseUsageConfig -class DremioSourceConfig(ConfigModel, StatefulIngestionConfigBase): - - # Dremio Connection Details +class DremioConnectionConfig(ConfigModel): hostname: Optional[str] = Field( default=None, description="Hostname or IP Address of the Dremio server", @@ -106,33 +57,94 @@ class DremioSourceConfig(ConfigModel, StatefulIngestionConfigBase): description="Path to SSL certificates", ) - # Dremio Cloud specific configs is_dremio_cloud: Optional[bool] = Field( default=False, description="Whether this is a Dremio Cloud instance", ) + dremio_cloud_region: Literal["US", "EMEA"] = Field( default="US", description="Dremio Cloud region ('US' or 'EMEA')", ) - # DataHub Environment details - env: str = Field( - default=FabricTypeClass.PROD, - description="Environment to use in namespace when constructing URNs.", + @validator("authentication_method") + def validate_auth_method(cls, value): + allowed_methods = ["password", "PAT"] + if value not in allowed_methods: + raise ValueError( + f"authentication_method must be one of {allowed_methods}", + ) + return value + + @validator("password") + def validate_password(cls, value, values): + if values.get("authentication_method") == "PAT" and not value: + raise ValueError( + "Password (Personal Access Token) is required when using PAT authentication", + ) + return value + + +class ProfileConfig(GEProfilingConfig): + partition_profiling_enabled: bool = Field( + default=False, + description="Partition profiling disabled for Dremio.", ) + include_field_median_value: bool = Field( + default=False, + description="Median causes a number of issues in Dremio.", + ) + query_timeout: int = Field( + default=300, description="Time before cancelling Dremio profiling query" + ) + + row_count: bool = True + column_count: bool = True + sample_values: bool = True + +class DremioSourceMapping(EnvConfigMixin, ConfigModel): + platform: Optional[str] = Field( + default=None, + description="Source connection made by Dremio (e.g. S3, Snowflake)", + ) + platform_name: Optional[str] = Field( + default=None, + description="Alias of platform in Dremio connection", + ) platform_instance: Optional[str] = Field( default=None, - description="The instance of the platform that all assets produced by this recipe belong to. " - "This should be unique within the platform. " - "See https://datahubproject.io/docs/platform-instances/ for more details.", + description="Platform instance of source connection in Datahub", + ) + dremio_source_type: Optional[str] = Field( + default=None, + description="Source connection made by Dremio (e.g. S3, Snowflake)", ) + root_path: Optional[str] = Field( + default=None, + description="Root path of source - Extracted from Dremio API", + hidden_from_docs=True, + ) + database_name: Optional[str] = Field( + default=None, + description="Database of source - Extracted from Dremio API", + hidden_from_docs=True, + ) + + +class DremioSourceConfig( + DremioConnectionConfig, + StatefulIngestionConfigBase, + EnvConfigMixin, + PlatformInstanceConfigMixin, +): + domain: Optional[str] = Field( default=None, description="Domain for all source objects.", ) + source_mappings: Optional[List[DremioSourceMapping]] = Field( default=None, description="Mappings from Dremio sources to DataHub platforms and datasets.", @@ -148,6 +160,11 @@ class DremioSourceConfig(ConfigModel, StatefulIngestionConfigBase): description="Regex patterns for schemas to filter", ) + usage: BaseUsageConfig = Field( + description="The usage config to use when generating usage statistics", + default=BaseUsageConfig(), + ) + # Profiling profile_pattern: AllowDenyPattern = Field( default=AllowDenyPattern.allow_all(), @@ -178,20 +195,3 @@ class DremioSourceConfig(ConfigModel, StatefulIngestionConfigBase): default=True, description="Whether to include copy lineage", ) - - @validator("authentication_method") - def validate_auth_method(cls, value): - allowed_methods = ["password", "PAT"] - if value not in allowed_methods: - raise ValueError( - f"authentication_method must be one of {allowed_methods}", - ) - return value - - @validator("password") - def validate_password(cls, value, values): - if values.get("authentication_method") == "PAT" and not value: - raise ValueError( - "Password (Personal Access Token) is required when using PAT authentication", - ) - return value diff --git a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py index 70a61d40bda3a..3da091d776b91 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py @@ -3,6 +3,9 @@ """ +from typing import Optional + + class DremioToDataHubSourceTypeMapping: """ Dremio source type to the Datahub source type mapping. @@ -84,7 +87,12 @@ def get_category(self, source_type): return "file_object_storage" return "unknown" - def add_mapping(self, dremio_source_type, datahub_source_type, category=None): + def add_mapping( + self, + dremio_source_type: str, + datahub_source_type: str, + category: Optional[str] = None, + ) -> None: """ Add new source type if not in map (e.g. Dremio ARP) """ diff --git a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_source.py b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_source.py index ddac5bf3a3e30..d2d76472113b2 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_source.py @@ -20,7 +20,11 @@ platform_name, support_status, ) -from datahub.ingestion.api.source import SourceCapability, SourceReport +from datahub.ingestion.api.source import ( + MetadataWorkUnitProcessor, + SourceCapability, + SourceReport, +) from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.source.dremio.dremio_api import ( DremioAPIOperations, @@ -46,7 +50,6 @@ DremioProfiler, ProfileConfig, ) -from datahub.ingestion.source.state.entity_removal_state import GenericCheckpointState from datahub.ingestion.source.state.stale_entity_removal_handler import ( StaleEntityRemovalHandler, StaleEntityRemovalSourceReport, @@ -54,7 +57,6 @@ from datahub.ingestion.source.state.stateful_ingestion_base import ( StatefulIngestionSourceBase, ) -from datahub.ingestion.source.usage.usage_common import BaseUsageConfig from datahub.metadata.com.linkedin.pegasus2avro.dataset import ( DatasetLineageTypeClass, UpstreamClass, @@ -94,11 +96,30 @@ def report_upstream_latency(self, start_time: datetime, end_time: datetime) -> N @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default") class DremioSource(StatefulIngestionSourceBase): """ - This plugin extracts the following: - - Metadata for databases, schemas, views and tables - - Column types associated with each table - - Table, row, and column statistics via optional SQL profiling - - Lineage information for views and datasets + This plugin integrates with Dremio to extract and ingest metadata into DataHub. + The following types of metadata are extracted: + + - Metadata for Spaces, Folders, Sources, and Datasets: + - Includes physical and virtual datasets, with detailed information about each dataset. + - Extracts metadata about Dremio's organizational hierarchy: Spaces (top-level), Folders (sub-level), and Sources (external data connections). + + - Schema and Column Information: + - Column types and schema metadata associated with each physical and virtual dataset. + - Extracts column-level metadata, such as names, data types, and descriptions, if available. + + - Lineage Information: + - Dataset-level and column-level lineage tracking: + - Dataset-level lineage shows dependencies and relationships between physical and virtual datasets. + - Column-level lineage tracks transformations applied to individual columns across datasets. + - Lineage information helps trace the flow of data and transformations within Dremio. + + - Ownership and Glossary Terms: + - Metadata related to ownership of datasets, extracted from Dremio’s ownership model. + - Glossary terms and business metadata associated with datasets, providing additional context to the data. + + - Optional SQL Profiling (if enabled): + - Table, row, and column statistics can be profiled and ingested via optional SQL queries. + - Extracts statistics about tables and columns, such as row counts and data distribution, for better insight into the dataset structure. """ config: DremioSourceConfig @@ -133,15 +154,6 @@ def __init__(self, config: DremioSourceConfig, ctx: Any): self.reference_source_mapping = DremioToDataHubSourceTypeMapping() self.max_workers = config.max_workers - # Handle stale entity removal - self.stale_entity_removal_handler = StaleEntityRemovalHandler( - source=self, - config=self.config, - state_type_class=GenericCheckpointState, - pipeline_name=self.ctx.pipeline_name, - run_id=self.ctx.run_id, - ) - self.sql_parsing_aggregator = SqlParsingAggregator( platform=make_data_platform_urn(self.get_platform()), platform_instance=self.config.platform_instance, @@ -149,7 +161,7 @@ def __init__(self, config: DremioSourceConfig, ctx: Any): graph=self.ctx.graph, generate_usage_statistics=True, generate_operations=True, - usage_config=BaseUsageConfig(), + usage_config=self.config.usage, ) @classmethod @@ -239,69 +251,51 @@ def _build_source_map(self) -> Dict[str, DremioSourceMapping]: return source_map - def get_workunits(self) -> Iterable[MetadataWorkUnit]: - """ - Generate workunits for Dremio metadata. - """ - - self.source_map = self._build_source_map() - - for wu in self.get_workunits_internal(): - self.report.report_workunit(wu) - yield wu - - # Emit the stale entity removal workunits - yield from self.stale_entity_removal_handler.gen_removed_entity_workunits() + def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]: + return [ + *super().get_workunit_processors(), + StaleEntityRemovalHandler.create( + self, self.config, self.ctx + ).workunit_processor, + ] def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: """ Internal method to generate workunits for Dremio metadata. """ + self.source_map = self._build_source_map() + # Process Containers containers = self.dremio_catalog.get_containers() - with ThreadPoolExecutor(max_workers=self.max_workers) as executor: - future_to_container = { - executor.submit(self.process_container, container): container - for container in containers - } - - for future in as_completed(future_to_container): - container_info = future_to_container[future] - try: - yield from future.result() - logger.info( - f"Dremio container {container_info.container_name} emitted successfully" - ) - except Exception as exc: - self.report.num_containers_failed = +1 - self.report.report_failure( - "Failed to process Dremio container", - f"Failed to process container {'.'.join(container_info.path)}.{container_info.resource_name}: {exc}", - ) + for container in containers: + try: + yield from self.process_container(container) + logger.info( + f"Dremio container {container.container_name} emitted successfully" + ) + except Exception as exc: + self.report.num_containers_failed += 1 # Increment failed containers + self.report.report_failure( + "Failed to process Dremio container", + f"Failed to process container {'.'.join(container.path)}.{container.resource_name}: {exc}", + ) # Process Datasets datasets = self.dremio_catalog.get_datasets() - with ThreadPoolExecutor(max_workers=self.max_workers) as executor: - future_to_dataset = { - executor.submit(self.process_dataset, dataset): dataset - for dataset in datasets - } - - for future in as_completed(future_to_dataset): - dataset_info = future_to_dataset[future] - try: - yield from future.result() - logger.info( - f"Dremio dataset {'.'.join(dataset_info.path)}.{dataset_info.resource_name} emitted successfully" - ) - except Exception as exc: - self.report.num_datasets_failed = +1 - self.report.report_failure( - "Failed to process Dremio dataset", - f"Failed to process dataset {'.'.join(dataset_info.path)}.{dataset_info.resource_name}: {exc}", - ) + for dataset_info in datasets: + try: + yield from self.process_dataset(dataset_info) + logger.info( + f"Dremio dataset {'.'.join(dataset_info.path)}.{dataset_info.resource_name} emitted successfully" + ) + except Exception as exc: + self.report.num_datasets_failed += 1 # Increment failed containers + self.report.report_failure( + "Failed to process Dremio dataset", + f"Failed to process dataset {'.'.join(dataset_info.path)}.{dataset_info.resource_name}: {exc}", + ) # Optionally Process Query Lineage if self.config.include_query_lineage: @@ -310,23 +304,14 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: # Process Glossary Terms glossary_terms = self.dremio_catalog.get_glossary_terms() - with ThreadPoolExecutor(max_workers=self.max_workers) as executor: - future_to_glossary_term = { - executor.submit( - self.process_glossary_term, glossary_term - ): glossary_term - for glossary_term in glossary_terms - } - - for future in as_completed(future_to_glossary_term): - glossary_term_info = future_to_glossary_term[future] - try: - yield from future.result() - except Exception as exc: - self.report.report_failure( - "Failed to process Glossary terms", - f"Failed to process glossary term {glossary_term_info.glossary_term}: {exc}", - ) + for glossary_term in glossary_terms: + try: + yield from self.process_glossary_term(glossary_term) + except Exception as exc: + self.report.report_failure( + "Failed to process Glossary terms", + f"Failed to process glossary term {glossary_term.glossary_term}: {exc}", + ) # Generate workunit for aggregated SQL parsing results for mcp in self.sql_parsing_aggregator.gen_metadata(): @@ -342,9 +327,7 @@ def process_container( container_urn = self.dremio_aspects.get_container_urn( path=container_info.path, name=container_info.container_name ) - self.stale_entity_removal_handler.add_entity_to_state( - type="container", urn=container_urn - ) + yield from self.dremio_aspects.populate_container_mcp( container_urn, container_info ) @@ -365,12 +348,6 @@ def process_dataset( platform_instance=self.config.platform_instance, ) - # Mark the entity as scanned - self.stale_entity_removal_handler.add_entity_to_state( - type="dataset", - urn=dataset_urn, - ) - for dremio_mcp in self.dremio_aspects.populate_dataset_mcp( dataset_urn, dataset_info ): @@ -436,12 +413,6 @@ def process_glossary_term( Process a Dremio container and generate metadata workunits. """ - glossary_term_urn = glossary_term_info.urn - - self.stale_entity_removal_handler.add_entity_to_state( - type="glossaryTerm", urn=glossary_term_urn - ) - yield from self.dremio_aspects.populate_glossary_term_mcp(glossary_term_info) def generate_view_lineage( @@ -610,7 +581,7 @@ def close(self) -> None: def _sql_dialect(platform: str) -> str: - return "trino" + return "drill" datahub.sql_parsing.sqlglot_utils._get_dialect_str = _sql_dialect