Skip to content

Commit

Permalink
fix(ingest): avoid shell entities during view lineage generation (dat…
Browse files Browse the repository at this point in the history
  • Loading branch information
mayurinehate authored Dec 6, 2024
1 parent 1ed55f4 commit 2fe2132
Show file tree
Hide file tree
Showing 8 changed files with 118 additions and 203 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1197,6 +1197,8 @@ def _run_sql_parser(
)
else:
self.report.num_view_definitions_parsed += 1
if raw_lineage.out_tables != [view_urn]:
self.report.num_view_definitions_view_urn_mismatch += 1
return view_definition_lineage_helper(raw_lineage, view_urn)

def get_db_schema(self, dataset_identifier: str) -> Tuple[Optional[str], str]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ class SQLSourceReport(
query_combiner: Optional[SQLAlchemyQueryCombinerReport] = None

num_view_definitions_parsed: int = 0
num_view_definitions_view_urn_mismatch: int = 0
num_view_definitions_failed_parsing: int = 0
num_view_definitions_failed_column_parsing: int = 0
view_definitions_parsing_failures: LossyList[str] = field(default_factory=LossyList)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -974,6 +974,8 @@ def _run_sql_parser(
)
else:
self.report.num_view_definitions_parsed += 1
if raw_lineage.out_tables != [view_urn]:
self.report.num_view_definitions_view_urn_mismatch += 1
return view_definition_lineage_helper(raw_lineage, view_urn)

def get_view_lineage(self) -> Iterable[MetadataWorkUnit]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1243,13 +1243,19 @@ def infer_output_schema(result: SqlParsingResult) -> Optional[List[SchemaFieldCl
def view_definition_lineage_helper(
result: SqlParsingResult, view_urn: str
) -> SqlParsingResult:
if result.query_type is QueryType.SELECT:
if result.query_type is QueryType.SELECT or (
result.out_tables and result.out_tables != [view_urn]
):
# Some platforms (e.g. postgres) store only <select statement> from view definition
# `create view V as <select statement>` . For such view definitions, `result.out_tables` and
# `result.column_lineage[].downstream` are empty in `sqlglot_lineage` response, whereas upstream
# details and downstream column details are extracted correctly.
# Here, we inject view V's urn in `result.out_tables` and `result.column_lineage[].downstream`
# to get complete lineage result.

# Some platforms(e.g. mssql) may have slightly different view name in view definition than
# actual view name used elsewhere. Therefore we overwrite downstream table for such cases as well.

result.out_tables = [view_urn]
if result.column_lineage:
for col_result in result.column_lineage:
Expand Down
Loading

0 comments on commit 2fe2132

Please sign in to comment.