From 3e4749624f34b177fee95feb1e587d687fe39a56 Mon Sep 17 00:00:00 2001 From: sid-acryl <155424659+sid-acryl@users.noreply.github.com> Date: Mon, 1 Jul 2024 23:45:51 +0530 Subject: [PATCH] feat(ingest/lookml): ingest field tags (#10792) Co-authored-by: Harshal Sheth --- .../ingestion/source/looker/looker_common.py | 26 +- .../ingestion/source/looker/lookml_source.py | 1 + .../looker/golden_looker_mces.json | 12 - .../looker/golden_test_allow_ingest.json | 6 - ...olden_test_external_project_view_mces.json | 6 - .../looker/golden_test_file_path_ingest.json | 6 - .../golden_test_independent_look_ingest.json | 6 - .../looker/golden_test_ingest.json | 6 - .../looker/golden_test_ingest_joins.json | 6 - .../golden_test_ingest_unaliased_joins.json | 6 - .../looker_mces_golden_deleted_stateful.json | 22 +- .../looker/looker_mces_usage_history.json | 6 - .../duplicate_field_ingestion_golden.json | 21 +- .../integration/lookml/expected_output.json | 9 - .../lookml/field_tag_ingestion_golden.json | 567 ++++++++++++++++++ .../dataset_lineages.view.lkml | 1 + .../lookml/lookml_mces_api_bigquery.json | 9 - .../lookml/lookml_mces_api_hive2.json | 9 - .../lookml/lookml_mces_offline.json | 9 - ...lookml_mces_offline_platform_instance.json | 9 - .../lookml_mces_with_external_urls.json | 9 - .../lookml/refinements_ingestion_golden.json | 9 - .../tests/integration/lookml/test_lookml.py | 26 + 23 files changed, 641 insertions(+), 146 deletions(-) create mode 100644 metadata-ingestion/tests/integration/lookml/field_tag_ingestion_golden.json diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py index b53fe27745fc67..44b6fcdf6d4673 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py @@ -244,6 +244,7 @@ class ViewField: view_name: Optional[str] = None is_primary_key: bool = False upstream_fields: List[str] = dataclasses_field(default_factory=list) + tags: List[str] = dataclasses_field(default_factory=list) @dataclass @@ -561,21 +562,30 @@ def _get_tag_mce_for_urn(tag_urn: str) -> MetadataChangeEvent: @staticmethod def _get_tags_from_field_type( - field_type: ViewFieldType, reporter: SourceReport + field: ViewField, reporter: SourceReport ) -> Optional[GlobalTagsClass]: - if field_type in LookerUtil.type_to_tag_map: - return GlobalTagsClass( - tags=[ + schema_field_tags: List[TagAssociationClass] = [ + TagAssociationClass(tag=builder.make_tag_urn(tag_name)) + for tag_name in field.tags + ] + + if field.field_type in LookerUtil.type_to_tag_map: + schema_field_tags.extend( + [ TagAssociationClass(tag=tag_name) - for tag_name in LookerUtil.type_to_tag_map[field_type] + for tag_name in LookerUtil.type_to_tag_map[field.field_type] ] ) else: reporter.report_warning( "lookml", - f"Failed to map view field type {field_type}. Won't emit tags for it", + f"Failed to map view field type {field.field_type}. Won't emit tags for measure and dimension", ) - return None + + if schema_field_tags: + return GlobalTagsClass(tags=schema_field_tags) + + return None @staticmethod def get_tag_mces() -> Iterable[MetadataChangeEvent]: @@ -602,7 +612,7 @@ def view_field_to_schema_field( else f"{field.field_type.value}. {field.description}" ), globalTags=( - LookerUtil._get_tags_from_field_type(field.field_type, reporter) + LookerUtil._get_tags_from_field_type(field, reporter) if tag_measures_and_dimensions is True else None ), diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py index 4a872f8b1a025a..0c9b3ae8695cf4 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py @@ -1090,6 +1090,7 @@ def _get_fields( is_primary_key=is_primary_key, field_type=type_cls, upstream_fields=upstream_fields, + tags=field_dict.get("tags") or [], ) fields.append(field) return fields diff --git a/metadata-ingestion/tests/integration/looker/golden_looker_mces.json b/metadata-ingestion/tests/integration/looker/golden_looker_mces.json index 9ec1a69c6ffdd0..1ce1b4b4750da7 100644 --- a/metadata-ingestion/tests/integration/looker/golden_looker_mces.json +++ b/metadata-ingestion/tests/integration/looker/golden_looker_mces.json @@ -78,9 +78,6 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false } }, @@ -140,9 +137,6 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false } } @@ -178,9 +172,6 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false } }, @@ -222,9 +213,6 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false } } diff --git a/metadata-ingestion/tests/integration/looker/golden_test_allow_ingest.json b/metadata-ingestion/tests/integration/looker/golden_test_allow_ingest.json index 875d5a7356091b..87430ef1067ffd 100644 --- a/metadata-ingestion/tests/integration/looker/golden_test_allow_ingest.json +++ b/metadata-ingestion/tests/integration/looker/golden_test_allow_ingest.json @@ -76,9 +76,6 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false } }, @@ -137,9 +134,6 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false } }, diff --git a/metadata-ingestion/tests/integration/looker/golden_test_external_project_view_mces.json b/metadata-ingestion/tests/integration/looker/golden_test_external_project_view_mces.json index 3ba4f1fbb5da97..3f11798d0aa5a4 100644 --- a/metadata-ingestion/tests/integration/looker/golden_test_external_project_view_mces.json +++ b/metadata-ingestion/tests/integration/looker/golden_test_external_project_view_mces.json @@ -312,9 +312,6 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false } }, @@ -373,9 +370,6 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false } }, diff --git a/metadata-ingestion/tests/integration/looker/golden_test_file_path_ingest.json b/metadata-ingestion/tests/integration/looker/golden_test_file_path_ingest.json index be49879f4e263c..ec2c46c5daf27a 100644 --- a/metadata-ingestion/tests/integration/looker/golden_test_file_path_ingest.json +++ b/metadata-ingestion/tests/integration/looker/golden_test_file_path_ingest.json @@ -312,9 +312,6 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false } }, @@ -373,9 +370,6 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false } }, diff --git a/metadata-ingestion/tests/integration/looker/golden_test_independent_look_ingest.json b/metadata-ingestion/tests/integration/looker/golden_test_independent_look_ingest.json index 465244c4922093..bb3c3ccb4e2146 100644 --- a/metadata-ingestion/tests/integration/looker/golden_test_independent_look_ingest.json +++ b/metadata-ingestion/tests/integration/looker/golden_test_independent_look_ingest.json @@ -324,9 +324,6 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false } }, @@ -386,9 +383,6 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false } }, diff --git a/metadata-ingestion/tests/integration/looker/golden_test_ingest.json b/metadata-ingestion/tests/integration/looker/golden_test_ingest.json index ab1c6719609827..a7b8abed02da31 100644 --- a/metadata-ingestion/tests/integration/looker/golden_test_ingest.json +++ b/metadata-ingestion/tests/integration/looker/golden_test_ingest.json @@ -312,9 +312,6 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false } }, @@ -373,9 +370,6 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false } }, diff --git a/metadata-ingestion/tests/integration/looker/golden_test_ingest_joins.json b/metadata-ingestion/tests/integration/looker/golden_test_ingest_joins.json index 0f55e26a2baab6..a1a7747c741a6b 100644 --- a/metadata-ingestion/tests/integration/looker/golden_test_ingest_joins.json +++ b/metadata-ingestion/tests/integration/looker/golden_test_ingest_joins.json @@ -312,9 +312,6 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false } }, @@ -373,9 +370,6 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false } }, diff --git a/metadata-ingestion/tests/integration/looker/golden_test_ingest_unaliased_joins.json b/metadata-ingestion/tests/integration/looker/golden_test_ingest_unaliased_joins.json index fcad66ce61f23b..4d8e2e79eafadb 100644 --- a/metadata-ingestion/tests/integration/looker/golden_test_ingest_unaliased_joins.json +++ b/metadata-ingestion/tests/integration/looker/golden_test_ingest_unaliased_joins.json @@ -76,9 +76,6 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false } }, @@ -137,9 +134,6 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false } }, diff --git a/metadata-ingestion/tests/integration/looker/looker_mces_golden_deleted_stateful.json b/metadata-ingestion/tests/integration/looker/looker_mces_golden_deleted_stateful.json index 15f1758aa4b83a..e3cbf43059c7c7 100644 --- a/metadata-ingestion/tests/integration/looker/looker_mces_golden_deleted_stateful.json +++ b/metadata-ingestion/tests/integration/looker/looker_mces_golden_deleted_stateful.json @@ -324,9 +324,6 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false } }, @@ -386,9 +383,6 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false } }, @@ -813,8 +807,8 @@ } }, { - "entityType": "container", - "entityUrn": "urn:li:container:621eb6e00da9abece0f64522f81be0e7", + "entityType": "chart", + "entityUrn": "urn:li:chart:(looker,dashboard_elements.10)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -830,8 +824,8 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(looker,dashboard_elements.10)", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(looker,dashboards.11)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -847,8 +841,8 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,bogus data.explore.my_view,PROD)", + "entityType": "container", + "entityUrn": "urn:li:container:621eb6e00da9abece0f64522f81be0e7", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -864,8 +858,8 @@ } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(looker,dashboards.11)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,bogus data.explore.my_view,PROD)", "changeType": "UPSERT", "aspectName": "status", "aspect": { diff --git a/metadata-ingestion/tests/integration/looker/looker_mces_usage_history.json b/metadata-ingestion/tests/integration/looker/looker_mces_usage_history.json index e94089d9b4ea4e..f3da2dbfac2f02 100644 --- a/metadata-ingestion/tests/integration/looker/looker_mces_usage_history.json +++ b/metadata-ingestion/tests/integration/looker/looker_mces_usage_history.json @@ -76,9 +76,6 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false } } @@ -113,9 +110,6 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false } } diff --git a/metadata-ingestion/tests/integration/lookml/duplicate_field_ingestion_golden.json b/metadata-ingestion/tests/integration/lookml/duplicate_field_ingestion_golden.json index b06b59ba436541..149610768af510 100644 --- a/metadata-ingestion/tests/integration/lookml/duplicate_field_ingestion_golden.json +++ b/metadata-ingestion/tests/integration/lookml/duplicate_field_ingestion_golden.json @@ -116,7 +116,7 @@ "aspect": { "json": { "materialized": false, - "viewLogic": "# File was added to check duplicate field issue\n\nview: dataset_lineages {\n sql_table_name: \"PUBLIC\".\"DATASET_LINEAGES\"\n ;;\n\n dimension: createdon {\n type: date\n sql: ${TABLE}.\"CREATEDON\" ;;\n }\n\n dimension_group: createdon {\n type: time\n timeframes: [\n raw,\n time,\n date,\n week,\n month,\n quarter,\n year\n ]\n sql: ${TABLE}.\"CREATEDON\" ;;\n }\n\n dimension: entity {\n type: string\n sql: ${TABLE}.\"ENTITY\" ;;\n }\n\n dimension: metadata {\n type: string\n sql: ${TABLE}.\"METADATA\" ;;\n }\n\n dimension: urn {\n type: string\n sql: ${TABLE}.\"URN\" ;;\n }\n\n dimension: version {\n type: number\n sql: ${TABLE}.\"VERSION\" ;;\n }\n\n measure: count {\n type: count\n drill_fields: []\n }\n}\n", + "viewLogic": "# File was added to check duplicate field issue\n\nview: dataset_lineages {\n sql_table_name: \"PUBLIC\".\"DATASET_LINEAGES\"\n ;;\n\n dimension: createdon {\n type: date\n sql: ${TABLE}.\"CREATEDON\" ;;\n }\n\n dimension_group: createdon {\n type: time\n timeframes: [\n raw,\n time,\n date,\n week,\n month,\n quarter,\n year\n ]\n sql: ${TABLE}.\"CREATEDON\" ;;\n }\n\n dimension: entity {\n type: string\n sql: ${TABLE}.\"ENTITY\" ;;\n }\n\n dimension: metadata {\n type: string\n sql: ${TABLE}.\"METADATA\" ;;\n }\n\n dimension: urn {\n type: string\n sql: ${TABLE}.\"URN\" ;;\n }\n\n dimension: version {\n type: number\n tags: [\"softVersion\"]\n sql: ${TABLE}.\"VERSION\" ;;\n }\n\n measure: count {\n type: count\n drill_fields: []\n }\n}\n", "viewLanguage": "lookml" } }, @@ -338,6 +338,9 @@ "recursive": false, "globalTags": { "tags": [ + { + "tag": "urn:li:tag:softVersion" + }, { "tag": "urn:li:tag:Dimension" } @@ -484,5 +487,21 @@ "runId": "lookml-test", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "tag", + "entityUrn": "urn:li:tag:softVersion", + "changeType": "UPSERT", + "aspectName": "tagKey", + "aspect": { + "json": { + "name": "softVersion" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/lookml/expected_output.json b/metadata-ingestion/tests/integration/lookml/expected_output.json index 931830eecf0c3e..1a789af60a8550 100644 --- a/metadata-ingestion/tests/integration/lookml/expected_output.json +++ b/metadata-ingestion/tests/integration/lookml/expected_output.json @@ -1388,9 +1388,6 @@ }, "nativeDataType": "unknown", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false }, { @@ -1405,9 +1402,6 @@ }, "nativeDataType": "unknown", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false }, { @@ -1422,9 +1416,6 @@ }, "nativeDataType": "unknown", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false } ], diff --git a/metadata-ingestion/tests/integration/lookml/field_tag_ingestion_golden.json b/metadata-ingestion/tests/integration/lookml/field_tag_ingestion_golden.json new file mode 100644 index 00000000000000..fdd37139880bdc --- /dev/null +++ b/metadata-ingestion/tests/integration/lookml/field_tag_ingestion_golden.json @@ -0,0 +1,567 @@ +[ +{ + "entityType": "container", + "entityUrn": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "looker", + "env": "PROD", + "project_name": "lkml_samples" + }, + "name": "lkml_samples" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:looker" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "LookML Project" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Folders" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.dataset_lineages,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "View" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.dataset_lineages,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "# File was added to check duplicate field issue\n\nview: dataset_lineages {\n sql_table_name: \"PUBLIC\".\"DATASET_LINEAGES\"\n ;;\n\n dimension: createdon {\n type: date\n sql: ${TABLE}.\"CREATEDON\" ;;\n }\n\n dimension_group: createdon {\n type: time\n timeframes: [\n raw,\n time,\n date,\n week,\n month,\n quarter,\n year\n ]\n sql: ${TABLE}.\"CREATEDON\" ;;\n }\n\n dimension: entity {\n type: string\n sql: ${TABLE}.\"ENTITY\" ;;\n }\n\n dimension: metadata {\n type: string\n sql: ${TABLE}.\"METADATA\" ;;\n }\n\n dimension: urn {\n type: string\n sql: ${TABLE}.\"URN\" ;;\n }\n\n dimension: version {\n type: number\n tags: [\"softVersion\"]\n sql: ${TABLE}.\"VERSION\" ;;\n }\n\n measure: count {\n type: count\n drill_fields: []\n }\n}\n", + "viewLanguage": "lookml" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.dataset_lineages,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.dataset_lineages,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": [ + "/Develop/lkml_samples/" + ] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1586847600000, + "actor": "urn:li:corpuser:datahub" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:conn,.public.dataset_lineages,PROD)", + "type": "VIEW" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,.public.dataset_lineages,PROD),entity)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.dataset_lineages,PROD),entity)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,.public.dataset_lineages,PROD),metadata)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.dataset_lineages,PROD),metadata)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,.public.dataset_lineages,PROD),urn)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.dataset_lineages,PROD),urn)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,.public.dataset_lineages,PROD),version)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.dataset_lineages,PROD),version)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,.public.dataset_lineages,PROD),createdon)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.dataset_lineages,PROD),createdon)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,.public.dataset_lineages,PROD),count)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.dataset_lineages,PROD),count)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "dataset_lineages", + "platform": "urn:li:dataPlatform:looker", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "entity", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": false + }, + { + "fieldPath": "metadata", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": false + }, + { + "fieldPath": "urn", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": false + }, + { + "fieldPath": "version", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "number", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:softVersion" + }, + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": false + }, + { + "fieldPath": "createdon", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.TimeType": {} + } + }, + "nativeDataType": "time", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + }, + { + "tag": "urn:li:tag:Temporal" + } + ] + }, + "isPartOfKey": false + }, + { + "fieldPath": "count", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "count", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Measure" + } + ] + }, + "isPartOfKey": false + } + ], + "primaryKeys": [] + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "looker.file.path": "dataset_lineages.view.lkml", + "looker.model": "data" + }, + "name": "dataset_lineages", + "tags": [] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.dataset_lineages,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { + "urn": "urn:li:tag:Dimension", + "aspects": [ + { + "com.linkedin.pegasus2avro.tag.TagProperties": { + "name": "Dimension", + "description": "A tag that is applied to all dimension fields." + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { + "urn": "urn:li:tag:Temporal", + "aspects": [ + { + "com.linkedin.pegasus2avro.tag.TagProperties": { + "name": "Temporal", + "description": "A tag that is applied to all time-based (temporal) fields such as timestamps or durations." + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { + "urn": "urn:li:tag:Measure", + "aspects": [ + { + "com.linkedin.pegasus2avro.tag.TagProperties": { + "name": "Measure", + "description": "A tag that is applied to all measures (metrics). Measures are typically the columns that you aggregate on" + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "tag", + "entityUrn": "urn:li:tag:Dimension", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "tag", + "entityUrn": "urn:li:tag:Measure", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "tag", + "entityUrn": "urn:li:tag:Temporal", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "tag", + "entityUrn": "urn:li:tag:softVersion", + "changeType": "UPSERT", + "aspectName": "tagKey", + "aspect": { + "json": { + "name": "softVersion" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/lookml/lkml_samples_duplicate_field/dataset_lineages.view.lkml b/metadata-ingestion/tests/integration/lookml/lkml_samples_duplicate_field/dataset_lineages.view.lkml index 6062993f320d33..c29c47719810a8 100644 --- a/metadata-ingestion/tests/integration/lookml/lkml_samples_duplicate_field/dataset_lineages.view.lkml +++ b/metadata-ingestion/tests/integration/lookml/lkml_samples_duplicate_field/dataset_lineages.view.lkml @@ -40,6 +40,7 @@ view: dataset_lineages { dimension: version { type: number + tags: ["softVersion"] sql: ${TABLE}."VERSION" ;; } diff --git a/metadata-ingestion/tests/integration/lookml/lookml_mces_api_bigquery.json b/metadata-ingestion/tests/integration/lookml/lookml_mces_api_bigquery.json index a846e2ca84b092..05c950f9e10516 100644 --- a/metadata-ingestion/tests/integration/lookml/lookml_mces_api_bigquery.json +++ b/metadata-ingestion/tests/integration/lookml/lookml_mces_api_bigquery.json @@ -1388,9 +1388,6 @@ }, "nativeDataType": "unknown", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false }, { @@ -1405,9 +1402,6 @@ }, "nativeDataType": "unknown", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false }, { @@ -1422,9 +1416,6 @@ }, "nativeDataType": "unknown", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false } ], diff --git a/metadata-ingestion/tests/integration/lookml/lookml_mces_api_hive2.json b/metadata-ingestion/tests/integration/lookml/lookml_mces_api_hive2.json index 47d536fb824099..23384d6070d202 100644 --- a/metadata-ingestion/tests/integration/lookml/lookml_mces_api_hive2.json +++ b/metadata-ingestion/tests/integration/lookml/lookml_mces_api_hive2.json @@ -1388,9 +1388,6 @@ }, "nativeDataType": "unknown", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false }, { @@ -1405,9 +1402,6 @@ }, "nativeDataType": "unknown", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false }, { @@ -1422,9 +1416,6 @@ }, "nativeDataType": "unknown", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false } ], diff --git a/metadata-ingestion/tests/integration/lookml/lookml_mces_offline.json b/metadata-ingestion/tests/integration/lookml/lookml_mces_offline.json index caefb7b9bcce2f..579a984b88243a 100644 --- a/metadata-ingestion/tests/integration/lookml/lookml_mces_offline.json +++ b/metadata-ingestion/tests/integration/lookml/lookml_mces_offline.json @@ -1388,9 +1388,6 @@ }, "nativeDataType": "unknown", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false }, { @@ -1405,9 +1402,6 @@ }, "nativeDataType": "unknown", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false }, { @@ -1422,9 +1416,6 @@ }, "nativeDataType": "unknown", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false } ], diff --git a/metadata-ingestion/tests/integration/lookml/lookml_mces_offline_platform_instance.json b/metadata-ingestion/tests/integration/lookml/lookml_mces_offline_platform_instance.json index c0cec6c2610100..d1487a62e95a88 100644 --- a/metadata-ingestion/tests/integration/lookml/lookml_mces_offline_platform_instance.json +++ b/metadata-ingestion/tests/integration/lookml/lookml_mces_offline_platform_instance.json @@ -1388,9 +1388,6 @@ }, "nativeDataType": "unknown", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false }, { @@ -1405,9 +1402,6 @@ }, "nativeDataType": "unknown", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false }, { @@ -1422,9 +1416,6 @@ }, "nativeDataType": "unknown", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false } ], diff --git a/metadata-ingestion/tests/integration/lookml/lookml_mces_with_external_urls.json b/metadata-ingestion/tests/integration/lookml/lookml_mces_with_external_urls.json index 31aec97293e5a9..d7793fbed8ef0c 100644 --- a/metadata-ingestion/tests/integration/lookml/lookml_mces_with_external_urls.json +++ b/metadata-ingestion/tests/integration/lookml/lookml_mces_with_external_urls.json @@ -1395,9 +1395,6 @@ }, "nativeDataType": "unknown", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false }, { @@ -1412,9 +1409,6 @@ }, "nativeDataType": "unknown", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false }, { @@ -1429,9 +1423,6 @@ }, "nativeDataType": "unknown", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false } ], diff --git a/metadata-ingestion/tests/integration/lookml/refinements_ingestion_golden.json b/metadata-ingestion/tests/integration/lookml/refinements_ingestion_golden.json index 16e901125e4517..25d6511d172a64 100644 --- a/metadata-ingestion/tests/integration/lookml/refinements_ingestion_golden.json +++ b/metadata-ingestion/tests/integration/lookml/refinements_ingestion_golden.json @@ -1409,9 +1409,6 @@ }, "nativeDataType": "unknown", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false }, { @@ -1426,9 +1423,6 @@ }, "nativeDataType": "unknown", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false }, { @@ -1443,9 +1437,6 @@ }, "nativeDataType": "unknown", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false } ], diff --git a/metadata-ingestion/tests/integration/lookml/test_lookml.py b/metadata-ingestion/tests/integration/lookml/test_lookml.py index 1c1f0fec3eebb9..1099a29ba3b8c8 100644 --- a/metadata-ingestion/tests/integration/lookml/test_lookml.py +++ b/metadata-ingestion/tests/integration/lookml/test_lookml.py @@ -868,6 +868,7 @@ def test_manifest_parser(pytestconfig: pytest.Config) -> None: @freeze_time(FROZEN_TIME) def test_duplicate_field_ingest(pytestconfig, tmp_path, mock_time): + test_resources_dir = pytestconfig.rootpath / "tests/integration/lookml" mce_out_file = "duplicate_ingest_mces_output.json" @@ -887,3 +888,28 @@ def test_duplicate_field_ingest(pytestconfig, tmp_path, mock_time): output_path=tmp_path / mce_out_file, golden_path=golden_path, ) + + +@freeze_time(FROZEN_TIME) +def test_field_tag_ingest(pytestconfig, tmp_path, mock_time): + test_resources_dir = pytestconfig.rootpath / "tests/integration/lookml" + mce_out_file = "field_tag_mces_output.json" + + new_recipe = get_default_recipe( + f"{tmp_path}/{mce_out_file}", + f"{test_resources_dir}/lkml_samples_duplicate_field", + ) + + new_recipe["source"]["config"]["tag_measures_and_dimensions"] = True + + pipeline = Pipeline.create(new_recipe) + pipeline.run() + pipeline.pretty_print_summary() + pipeline.raise_from_status(raise_warnings=True) + + golden_path = test_resources_dir / "field_tag_ingestion_golden.json" + mce_helpers.check_golden_file( + pytestconfig, + output_path=tmp_path / mce_out_file, + golden_path=golden_path, + )