From 4489cd5f184bd7ddae2898c01436cd664601fba2 Mon Sep 17 00:00:00 2001 From: GitHub Action Website Snapshot <> Date: Wed, 30 Oct 2024 09:20:36 +0000 Subject: [PATCH] Refreshing website content from main repo. Source commit: https://github.com/OpenLineage/OpenLineage/commit/50ba05b3466ffa082e50ea3ff645e154e489195e --- .../spark/spark_column_lineage.md | 43 ------------------- 1 file changed, 43 deletions(-) diff --git a/docs/integrations/spark/spark_column_lineage.md b/docs/integrations/spark/spark_column_lineage.md index 05b811f..5e53a76 100644 --- a/docs/integrations/spark/spark_column_lineage.md +++ b/docs/integrations/spark/spark_column_lineage.md @@ -92,46 +92,3 @@ To unravel two dependencies implement following logic: The inputs are also mapped for all dataset dependencies. The result is added to each output. Finally, the list of outputs with all their inputs is mapped to `ColumnLineageDatasetFacetFields` object. - -## Writing custom extensions - -Spark framework is known for its great ability to be extended by custom libraries capable of reading or writing to anything. In case of having a custom implementation, we prepared an ability to extend column-level lineage implementation to be able to retrieve information from other input or output LogicalPlan nodes. - -Creating such an extension requires implementing a following interface: - -``` -/** Interface for implementing custom collectors of column-level lineage. */ -interface CustomColumnLineageVisitor { - - /** - * Collect inputs for a given {@link LogicalPlan}. Column-level lineage mechanism traverses - * LogicalPlan on its node. This method will be called for each traversed node. Input information - * should be put into builder. - * - * @param node - * @param builder - */ - void collectInputs(LogicalPlan node, ColumnLevelLineageBuilder builder); - - /** - * Collect outputs for a given {@link LogicalPlan}. Column-level lineage mechanism traverses - * LogicalPlan on its node. This method will be called for each traversed node. Output information - * should be put into builder. - * - * @param node - * @param builder - */ - void collectOutputs(LogicalPlan node, ColumnLevelLineageBuilder builder); - - /** - * Collect expressions for a given {@link LogicalPlan}. Column-level lineage mechanism traverses - * LogicalPlan on its node. This method will be called for each traversed node. Expression - * dependency information should be put into builder. - * - * @param node - * @param builder - */ - void collectExpressionDependencies(LogicalPlan node, ColumnLevelLineageBuilder builder); -} -``` -and making it available for Service Loader (implementation class name has to be put in a resource file `META-INF/services/io.openlineage.spark.agent.lifecycle.plan.column.CustomColumnLineageVisitor`).