diff --git a/blog/openlineage-spark/index.mdx b/blog/openlineage-spark/index.mdx index ee121b4..b46eb60 100644 --- a/blog/openlineage-spark/index.mdx +++ b/blog/openlineage-spark/index.mdx @@ -51,7 +51,7 @@ familiar with it and how it's used in Spark applications. OpenLineage integrates interface and collecting information about jobs that are executed inside a Spark application. To activate the listener, add the following properties to your Spark configuration: ``` -spark.jars.packages io.openlineage:openlineage-spark:1.23.0 +spark.jars.packages io.openlineage:openlineage-spark:{{PREPROCESSOR:OPENLINEAGE_VERSION}} spark.extraListeners io.openlineage.spark.agent.OpenLineageSparkListener ``` This can be added to your cluster’s `spark-defaults.conf` file, in which case it will record lineage for every job executed on the cluster, or added to specific jobs on submission via the `spark-submit` command. Once the listener is activated, it needs to know where to report lineage events, as well as the namespace of your jobs. Add the following additional configuration lines to your `spark-defaults.conf` file or your Spark submission script: @@ -122,7 +122,7 @@ spark = (SparkSession.builder.master('local').appName('openlineage_spark_test') .config('spark.jars', ",".join(files)) # Install and set up the OpenLineage listener - .config('spark.jars.packages', 'io.openlineage:openlineage-spark:1.23.0) + .config('spark.jars.packages', 'io.openlineage:openlineage-spark:{{PREPROCESSOR:OPENLINEAGE_VERSION}}) .config('spark.extraListeners', 'io.openlineage.spark.agent.OpenLineageSparkListener') .config('spark.openlineage.transport.url', 'http://marquez-api:5000') .config('spark.openlineage.transport.type', 'http') diff --git a/docs/client/java/java.md b/docs/client/java/java.md index 3792e0b..cff5f99 100644 --- a/docs/client/java/java.md +++ b/docs/client/java/java.md @@ -24,14 +24,14 @@ Maven: io.openlineage openlineage-java - ${OPENLINEAGE_VERSION} + {{PREPROCESSOR:OPENLINEAGE_VERSION}} ``` or Gradle: ```groovy -implementation("io.openlineage:openlineage-java:${OPENLINEAGE_VERSION}") +implementation("io.openlineage:openlineage-java:{{PREPROCESSOR:OPENLINEAGE_VERSION}}") ``` For more information on the available versions of the `openlineage-java`, diff --git a/docs/client/java/partials/s3_transport.md b/docs/client/java/partials/s3_transport.md index c35ab00..99e6a34 100644 --- a/docs/client/java/partials/s3_transport.md +++ b/docs/client/java/partials/s3_transport.md @@ -15,7 +15,7 @@ to be emitted correctly. io.openlineage transports-s3 - YOUR_VERSION_HERE + {{PREPROCESSOR:OPENLINEAGE_VERSION}} ``` diff --git a/docs/development/developing/python/api-reference/openlineage.client.md b/docs/development/developing/python/api-reference/openlineage.client.md index 48da3ac..514d94f 100644 --- a/docs/development/developing/python/api-reference/openlineage.client.md +++ b/docs/development/developing/python/api-reference/openlineage.client.md @@ -4,5 +4,5 @@ title: Python Client -
\n
\n
\n On this page\n
\n
\n\n
\n
\n\n
\n

openlineage.client.client module

\n
\n
\nclass openlineage.client.client.OpenLineageClientOptions(timeout=5.0, verify=True, api_key=None, adapter=None)
\n

Bases: object

\n
\n
Parameters:
\n
    \n
  • timeout (float)

  • \n
  • verify (bool)

  • \n
  • api_key (Optional[str])

  • \n
  • adapter (Optional[HTTPAdapter])

  • \n
\n
\n
\n
\n
\ntimeout: float
\n
\n
\n
\nverify: bool
\n
\n
\n
\napi_key: str
\n
\n
\n
\nadapter: HTTPAdapter
\n
\n
\n
\n
\nclass openlineage.client.client.OpenLineageConfig(transport=_Nothing.NOTHING, facets=_Nothing.NOTHING, filters=_Nothing.NOTHING)
\n

Bases: object

\n
\n
Parameters:
\n
    \n
  • transport (dict[str, Any] | None)

  • \n
  • facets (FacetsConfig)

  • \n
  • filters (list[FilterConfig])

  • \n
\n
\n
\n
\n
\ntransport: dict[str, Any] | None
\n
\n
\n
\nfacets: FacetsConfig
\n
\n
\n
\nfilters: list[FilterConfig]
\n
\n
\n
\nclassmethod from_dict(params)
\n
\n
Parameters:
\n

params (dict[str, Any])

\n
\n
Return type:
\n

OpenLineageConfig

\n
\n
\n
\n
\n
\n
\nclass openlineage.client.client.OpenLineageClient(url=None, options=None, session=None, transport=None, factory=None, *, config=None)
\n

Bases: object

\n
\n
Parameters:
\n
    \n
  • url (str | None)

  • \n
  • options (OpenLineageClientOptions | None)

  • \n
  • session (Session | None)

  • \n
  • transport (Transport | None)

  • \n
  • factory (TransportFactory | None)

  • \n
  • config (dict[str, str] | None)

  • \n
\n
\n
\n
\n
\nDYNAMIC_ENV_VARS_PREFIX = 'OPENLINEAGE__'
\n
\n
\n
\nDEFAULT_URL_TRANSPORT_NAME = 'default_http'
\n
\n
\n
\nclassmethod from_environment()
\n
\n
Return type:
\n

_T

\n
\n
\n
\n
\n
\nclassmethod from_dict(config)
\n
\n
Parameters:
\n

config (dict[str, str])

\n
\n
Return type:
\n

_T

\n
\n
\n
\n
\n
\nfilter_event(event)
\n

Filters jobs according to config-defined events

\n
\n
Parameters:
\n

event (Event)

\n
\n
Return type:
\n

Event | None

\n
\n
\n
\n
\n
\nemit(event)
\n
\n
Parameters:
\n

event (Union[RunEvent, DatasetEvent, JobEvent, RunEvent, DatasetEvent, JobEvent])

\n
\n
Return type:
\n

None

\n
\n
\n
\n
\n
\nproperty config: OpenLineageConfig
\n

Retrieves the OpenLineage configuration.

\n

This property method returns the content of the OpenLineage YAML config file.\nThe configuration is determined by merging sources in the following order of precedence:\n1. User-defined configuration passed to the client constructor.\n2. YAML config file located in one of the following paths:\n- Path specified by the OPENLINEAGE_CONFIG environment variable.\n- Current working directory.\n- $HOME/.openlineage.\n3. Environment variables with the OPENLINEAGE__ prefix.\nIf the configuration is not already loaded, it will be constructed by merging the above sources.\nIn case of a TypeError during the parsing of the configuration, a ValueError will be raised indicating\nthat the structure of the config does not match the expected format.

\n
\n
\n
\nadd_environment_facets(event)
\n

Adds environment variables as facets to the event object.

\n
\n
Parameters:
\n

event (Union[RunEvent, DatasetEvent, JobEvent, RunEvent, DatasetEvent, JobEvent])

\n
\n
Return type:
\n

Union[RunEvent, DatasetEvent, JobEvent, RunEvent, DatasetEvent, JobEvent]

\n
\n
\n
\n
\n
\n
\n

openlineage.client.event_v2 module

\n
\n
\nclass openlineage.client.event_v2.BaseEvent(*, eventTime, producer='')
\n

Bases: RedactMixin

\n
\n
Parameters:
\n
    \n
  • eventTime (str)

  • \n
  • producer (str)

  • \n
\n
\n
\n
\n
\neventTime: str
\n

the time the event occurred at

\n
\n
\n
\nproducer: str
\n
\n
\n
\nschemaURL: str
\n
\n
\n
\nproperty skip_redact: list[str]
\n
\n
\n
\neventtime_check(attribute, value)
\n
\n
Parameters:
\n
    \n
  • attribute (str)

  • \n
  • value (str)

  • \n
\n
\n
Return type:
\n

None

\n
\n
\n
\n
\n
\nproducer_check(attribute, value)
\n
\n
Parameters:
\n
    \n
  • attribute (str)

  • \n
  • value (str)

  • \n
\n
\n
Return type:
\n

None

\n
\n
\n
\n
\n
\nschemaurl_check(attribute, value)
\n
\n
Parameters:
\n
    \n
  • attribute (str)

  • \n
  • value (str)

  • \n
\n
\n
Return type:
\n

None

\n
\n
\n
\n
\n
\n
\nclass openlineage.client.event_v2.RunEvent(*, eventTime, producer='', run, job, eventType=None, inputs=_Nothing.NOTHING, outputs=_Nothing.NOTHING)
\n

Bases: BaseEvent

\n
\n
Parameters:
\n
    \n
  • eventTime (str)

  • \n
  • producer (str)

  • \n
  • run (Run)

  • \n
  • job (Job)

  • \n
  • eventType (EventType | None)

  • \n
  • inputs (list[InputDataset] | None)

  • \n
  • outputs (list[OutputDataset] | None)

  • \n
\n
\n
\n
\n
\nrun: Run
\n
\n
\n
\njob: Job
\n
\n
\n
\neventType: EventType | None
\n

the current transition of the run state. It is required to issue 1 START event and 1 of [ COMPLETE,\nABORT, FAIL ] event per run. Additional events with OTHER eventType can be added to the same run.\nFor example to send additional metadata after the run is complete

\n
\n
\n
\ninputs: list[InputDataset] | None
\n

The set of input datasets.

\n
\n
\n
\noutputs: list[OutputDataset] | None
\n

The set of output datasets.

\n
\n
\n
\n
\nclass openlineage.client.event_v2.JobEvent(*, eventTime, producer='', job, inputs=_Nothing.NOTHING, outputs=_Nothing.NOTHING)
\n

Bases: BaseEvent

\n
\n
Parameters:
\n
\n
\n
\n
\n
\njob: Job
\n
\n
\n
\ninputs: list[InputDataset] | None
\n

The set of input datasets.

\n
\n
\n
\noutputs: list[OutputDataset] | None
\n

The set of output datasets.

\n
\n
\n
\n
\nclass openlineage.client.event_v2.DatasetEvent(*, eventTime, producer='', dataset)
\n

Bases: BaseEvent

\n
\n
Parameters:
\n
    \n
  • eventTime (str)

  • \n
  • producer (str)

  • \n
  • dataset (StaticDataset)

  • \n
\n
\n
\n
\n
\ndataset: StaticDataset
\n
\n
\n
\n
\nopenlineage.client.event_v2.RunState
\n

alias of EventType

\n
\n
\n
\nclass openlineage.client.event_v2.Dataset(namespace, name, *, facets=_Nothing.NOTHING)
\n

Bases: RedactMixin

\n
\n
Parameters:
\n
    \n
  • namespace (str)

  • \n
  • name (str)

  • \n
  • facets (dict[str, DatasetFacet] | None)

  • \n
\n
\n
\n
\n
\nnamespace: str
\n

The namespace containing that dataset

\n
\n
\n
\nname: str
\n

The unique name for that dataset within that namespace

\n
\n
\n
\nfacets: dict[str, DatasetFacet] | None
\n

The facets for this dataset

\n
\n
\n
\n
\nclass openlineage.client.event_v2.InputDataset(namespace, name, inputFacets=_Nothing.NOTHING, *, facets=_Nothing.NOTHING)
\n

Bases: Dataset

\n

An input dataset

\n
\n
Parameters:
\n
\n
\n
\n
\n
\ninputFacets: dict[str, InputDatasetFacet] | None
\n

The input facets for this dataset.

\n
\n
\n
\n
\nclass openlineage.client.event_v2.OutputDataset(namespace, name, outputFacets=_Nothing.NOTHING, *, facets=_Nothing.NOTHING)
\n

Bases: Dataset

\n

An output dataset

\n
\n
Parameters:
\n
\n
\n
\n
\n
\noutputFacets: dict[str, OutputDatasetFacet] | None
\n

The output facets for this dataset

\n
\n
\n
\n
\nclass openlineage.client.event_v2.Run(runId, facets=_Nothing.NOTHING)
\n

Bases: RedactMixin

\n
\n
Parameters:
\n
    \n
  • runId (str)

  • \n
  • facets (dict[str, RunFacet] | None)

  • \n
\n
\n
\n
\n
\nrunId: str
\n

The globally unique ID of the run associated with the job.

\n
\n
\n
\nfacets: dict[str, RunFacet] | None
\n

The run facets.

\n
\n
\n
\nrunid_check(attribute, value)
\n
\n
Parameters:
\n
    \n
  • attribute (str)

  • \n
  • value (str)

  • \n
\n
\n
Return type:
\n

None

\n
\n
\n
\n
\n
\n
\nclass openlineage.client.event_v2.Job(namespace, name, facets=_Nothing.NOTHING)
\n

Bases: RedactMixin

\n
\n
Parameters:
\n
    \n
  • namespace (str)

  • \n
  • name (str)

  • \n
  • facets (dict[str, JobFacet] | None)

  • \n
\n
\n
\n
\n
\nnamespace: str
\n

The namespace containing that job

\n
\n
\n
\nname: str
\n

The unique name for that job within that namespace

\n
\n
\n
\nfacets: dict[str, JobFacet] | None
\n

The job facets.

\n
\n
\n
\n
\nopenlineage.client.event_v2.set_producer(producer)
\n
\n
Parameters:
\n

producer (str)

\n
\n
Return type:
\n

None

\n
\n
\n
\n
\n
\n

openlineage.client.facet module

\n
\n
\nopenlineage.client.facet.set_producer(producer)
\n
\n
Parameters:
\n

producer (str)

\n
\n
Return type:
\n

None

\n
\n
\n
\n
\n
\nclass openlineage.client.facet.BaseFacet
\n

Bases: RedactMixin

\n
\n
\n
\n
\nproperty skip_redact: List[str]
\n
\n
\n
\n
\nclass openlineage.client.facet.NominalTimeRunFacet(nominalStartTime, nominalEndTime=None)
\n

Bases: BaseFacet

\n
\n
Parameters:
\n
    \n
  • nominalStartTime (str)

  • \n
  • nominalEndTime (Optional[str])

  • \n
\n
\n
\n
\n
\nnominalStartTime: str
\n
\n
\n
\nnominalEndTime: Optional[str]
\n
\n
\n
\n
\nclass openlineage.client.facet.ParentRunFacet(run, job)
\n

Bases: BaseFacet

\n
\n
Parameters:
\n
    \n
  • run (Dict[Any, Any])

  • \n
  • job (Dict[Any, Any])

  • \n
\n
\n
\n
\n
\nrun: Dict[Any, Any]
\n
\n
\n
\njob: Dict[Any, Any]
\n
\n
\n
\nclassmethod create(runId, namespace, name)
\n
\n
Parameters:
\n
    \n
  • runId (str)

  • \n
  • namespace (str)

  • \n
  • name (str)

  • \n
\n
\n
Return type:
\n

ParentRunFacet

\n
\n
\n
\n
\n
\n
\nclass openlineage.client.facet.DocumentationJobFacet(description)
\n

Bases: BaseFacet

\n
\n
Parameters:
\n

description (str)

\n
\n
\n
\n
\ndescription: str
\n
\n
\n
\n
\nclass openlineage.client.facet.SourceCodeLocationJobFacet(type, url)
\n

Bases: BaseFacet

\n
\n
Parameters:
\n
    \n
  • type (str)

  • \n
  • url (str)

  • \n
\n
\n
\n
\n
\ntype: str
\n
\n
\n
\nurl: str
\n
\n
\n
\n
\nclass openlineage.client.facet.SqlJobFacet(query)
\n

Bases: BaseFacet

\n
\n
Parameters:
\n

query (str)

\n
\n
\n
\n
\nquery: str
\n
\n
\n
\n
\nclass openlineage.client.facet.DocumentationDatasetFacet(description)
\n

Bases: BaseFacet

\n
\n
Parameters:
\n

description (str)

\n
\n
\n
\n
\ndescription: str
\n
\n
\n
\n
\nclass openlineage.client.facet.SchemaField(name, type, description=None)
\n

Bases: RedactMixin

\n
\n
Parameters:
\n
    \n
  • name (str)

  • \n
  • type (str)

  • \n
  • description (Optional[str])

  • \n
\n
\n
\n
\n
\nname: str
\n
\n
\n
\ntype: str
\n
\n
\n
\ndescription: Optional[str]
\n
\n
\n
\n
\nclass openlineage.client.facet.SchemaDatasetFacet(fields)
\n

Bases: BaseFacet

\n
\n
Parameters:
\n

fields (List[SchemaField])

\n
\n
\n
\n
\nfields: List[SchemaField]
\n
\n
\n
\n
\nclass openlineage.client.facet.DataSourceDatasetFacet(name, uri)
\n

Bases: BaseFacet

\n
\n
Parameters:
\n
    \n
  • name (str)

  • \n
  • uri (str)

  • \n
\n
\n
\n
\n
\nname: str
\n
\n
\n
\nuri: str
\n
\n
\n
\n
\nclass openlineage.client.facet.OutputStatisticsOutputDatasetFacet(rowCount=None, size=None, fileCount=None)
\n

Bases: BaseFacet

\n
\n
Parameters:
\n
    \n
  • rowCount (Optional[int])

  • \n
  • size (Optional[int])

  • \n
  • fileCount (Optional[int])

  • \n
\n
\n
\n
\n
\nrowCount: Optional[int]
\n
\n
\n
\nsize: Optional[int]
\n
\n
\n
\nfileCount: Optional[int]
\n
\n
\n
\n
\nclass openlineage.client.facet.ColumnMetric(nullCount=None, distinctCount=None, sum=None, count=None, min=None, max=None, quantiles=None)
\n

Bases: object

\n
\n
Parameters:
\n
    \n
  • nullCount (Optional[int])

  • \n
  • distinctCount (Optional[int])

  • \n
  • sum (Optional[int])

  • \n
  • count (Optional[int])

  • \n
  • min (Optional[float])

  • \n
  • max (Optional[float])

  • \n
  • quantiles (Optional[Dict[str, float]])

  • \n
\n
\n
\n
\n
\nnullCount: Optional[int]
\n
\n
\n
\ndistinctCount: Optional[int]
\n
\n
\n
\nsum: Optional[int]
\n
\n
\n
\ncount: Optional[int]
\n
\n
\n
\nmin: Optional[float]
\n
\n
\n
\nmax: Optional[float]
\n
\n
\n
\nquantiles: Optional[Dict[str, float]]
\n
\n
\n
\n
\nclass openlineage.client.facet.DataQualityMetricsInputDatasetFacet(rowCount=None, bytes=None, fileCount=None, columnMetrics=_Nothing.NOTHING)
\n

Bases: BaseFacet

\n
\n
Parameters:
\n
    \n
  • rowCount (Optional[int])

  • \n
  • bytes (Optional[int])

  • \n
  • fileCount (Optional[int])

  • \n
  • columnMetrics (Dict[str, ColumnMetric])

  • \n
\n
\n
\n
\n
\nrowCount: Optional[int]
\n
\n
\n
\nbytes: Optional[int]
\n
\n
\n
\nfileCount: Optional[int]
\n
\n
\n
\ncolumnMetrics: Dict[str, ColumnMetric]
\n
\n
\n
\n
\nclass openlineage.client.facet.Assertion(assertion, success, column=None)
\n

Bases: RedactMixin

\n
\n
Parameters:
\n
    \n
  • assertion (str)

  • \n
  • success (bool)

  • \n
  • column (Optional[str])

  • \n
\n
\n
\n
\n
\nassertion: str
\n
\n
\n
\nsuccess: bool
\n
\n
\n
\ncolumn: Optional[str]
\n
\n
\n
\n
\nclass openlineage.client.facet.DataQualityAssertionsDatasetFacet(assertions)
\n

Bases: BaseFacet

\n

This facet represents asserted expectations on dataset or it\u2019s column.

\n
\n
Parameters:
\n

assertions (List[Assertion])

\n
\n
\n
\n
\nassertions: List[Assertion]
\n
\n
\n
\n
\nclass openlineage.client.facet.SourceCodeJobFacet(language, source)
\n

Bases: BaseFacet

\n

This facet represents source code that the job executed.

\n
\n
Parameters:
\n
    \n
  • language (str)

  • \n
  • source (str)

  • \n
\n
\n
\n
\n
\nlanguage: str
\n
\n
\n
\nsource: str
\n
\n
\n
\n
\nclass openlineage.client.facet.ExternalQueryRunFacet(externalQueryId, source)
\n

Bases: BaseFacet

\n
\n
Parameters:
\n
    \n
  • externalQueryId (str)

  • \n
  • source (str)

  • \n
\n
\n
\n
\n
\nexternalQueryId: str
\n
\n
\n
\nsource: str
\n
\n
\n
\n
\nclass openlineage.client.facet.ErrorMessageRunFacet(message, programmingLanguage, stackTrace=None)
\n

Bases: BaseFacet

\n

This facet represents an error message that was the result of a job run.

\n
\n
Parameters:
\n
    \n
  • message (str)

  • \n
  • programmingLanguage (str)

  • \n
  • stackTrace (Optional[str])

  • \n
\n
\n
\n
\n
\nmessage: str
\n
\n
\n
\nprogrammingLanguage: str
\n
\n
\n
\nstackTrace: Optional[str]
\n
\n
\n
\n
\nclass openlineage.client.facet.SymlinksDatasetFacetIdentifiers(namespace, name, type)
\n

Bases: object

\n
\n
Parameters:
\n
    \n
  • namespace (str)

  • \n
  • name (str)

  • \n
  • type (str)

  • \n
\n
\n
\n
\n
\nnamespace: str
\n
\n
\n
\nname: str
\n
\n
\n
\ntype: str
\n
\n
\n
\n
\nclass openlineage.client.facet.SymlinksDatasetFacet(identifiers=_Nothing.NOTHING)
\n

Bases: BaseFacet

\n

This facet represents dataset symlink names.

\n
\n
Parameters:
\n

identifiers (List[SymlinksDatasetFacetIdentifiers])

\n
\n
\n
\n
\nidentifiers: List[SymlinksDatasetFacetIdentifiers]
\n
\n
\n
\n
\nclass openlineage.client.facet.StorageDatasetFacet(storageLayer, fileFormat)
\n

Bases: BaseFacet

\n

This facet represents dataset symlink names.

\n
\n
Parameters:
\n
    \n
  • storageLayer (str)

  • \n
  • fileFormat (str)

  • \n
\n
\n
\n
\n
\nstorageLayer: str
\n
\n
\n
\nfileFormat: str
\n
\n
\n
\n
\nclass openlineage.client.facet.OwnershipJobFacetOwners(name, type=None)
\n

Bases: object

\n
\n
Parameters:
\n
    \n
  • name (str)

  • \n
  • type (Optional[str])

  • \n
\n
\n
\n
\n
\nname: str
\n
\n
\n
\ntype: Optional[str]
\n
\n
\n
\n
\nclass openlineage.client.facet.OwnershipJobFacet(owners=_Nothing.NOTHING)
\n

Bases: BaseFacet

\n

This facet represents ownership of a job.

\n
\n
Parameters:
\n

owners (List[OwnershipJobFacetOwners])

\n
\n
\n
\n
\nowners: List[OwnershipJobFacetOwners]
\n
\n
\n
\n
\nclass openlineage.client.facet.JobTypeJobFacet(processingType, integration, jobType)
\n

Bases: BaseFacet

\n

This facet represents job type properties.

\n
\n
Parameters:
\n
    \n
  • processingType (str)

  • \n
  • integration (str)

  • \n
  • jobType (str)

  • \n
\n
\n
\n
\n
\nprocessingType: str
\n
\n
\n
\nintegration: str
\n
\n
\n
\njobType: str
\n
\n
\n
\n
\nclass openlineage.client.facet.DatasetVersionDatasetFacet(datasetVersion)
\n

Bases: BaseFacet

\n

This facet represents version of a dataset.

\n
\n
Parameters:
\n

datasetVersion (str)

\n
\n
\n
\n
\ndatasetVersion: str
\n
\n
\n
\n
\nclass openlineage.client.facet.LifecycleStateChange(value)
\n

Bases: Enum

\n

An enumeration.

\n
\n
\nALTER = 'ALTER'
\n
\n
\n
\nCREATE = 'CREATE'
\n
\n
\n
\nDROP = 'DROP'
\n
\n
\n
\nOVERWRITE = 'OVERWRITE'
\n
\n
\n
\nRENAME = 'RENAME'
\n
\n
\n
\nTRUNCATE = 'TRUNCATE'
\n
\n
\n
\n
\nclass openlineage.client.facet.LifecycleStateChangeDatasetFacetPreviousIdentifier(name, namespace)
\n

Bases: object

\n
\n
Parameters:
\n
    \n
  • name (str)

  • \n
  • namespace (str)

  • \n
\n
\n
\n
\n
\nname: str
\n
\n
\n
\nnamespace: str
\n
\n
\n
\n
\nclass openlineage.client.facet.LifecycleStateChangeDatasetFacet(lifecycleStateChange, previousIdentifier)
\n

Bases: BaseFacet

\n

This facet represents information of lifecycle changes of a dataset.

\n
\n
Parameters:
\n
\n
\n
\n
\n
\nlifecycleStateChange: LifecycleStateChange
\n
\n
\n
\npreviousIdentifier: LifecycleStateChangeDatasetFacetPreviousIdentifier
\n
\n
\n
\n
\nclass openlineage.client.facet.OwnershipDatasetFacetOwners(name, type)
\n

Bases: object

\n
\n
Parameters:
\n
    \n
  • name (str)

  • \n
  • type (str)

  • \n
\n
\n
\n
\n
\nname: str
\n
\n
\n
\ntype: str
\n
\n
\n
\n
\nclass openlineage.client.facet.OwnershipDatasetFacet(owners=_Nothing.NOTHING)
\n

Bases: BaseFacet

\n

This facet represents ownership of a dataset.

\n
\n
Parameters:
\n

owners (List[OwnershipDatasetFacetOwners])

\n
\n
\n
\n
\nowners: List[OwnershipDatasetFacetOwners]
\n
\n
\n
\n
\nclass openlineage.client.facet.ColumnLineageDatasetFacetFieldsAdditionalInputFields(namespace, name, field)
\n

Bases: RedactMixin

\n
\n
Parameters:
\n
    \n
  • namespace (str)

  • \n
  • name (str)

  • \n
  • field (str)

  • \n
\n
\n
\n
\n
\nnamespace: str
\n
\n
\n
\nname: str
\n
\n
\n
\nfield: str
\n
\n
\n
\n
\nclass openlineage.client.facet.ColumnLineageDatasetFacetFieldsAdditional(inputFields, transformationDescription, transformationType)
\n

Bases: object

\n
\n
Parameters:
\n
\n
\n
\n
\n
\ninputFields: ClassVar[List[ColumnLineageDatasetFacetFieldsAdditionalInputFields]]
\n
\n
\n
\ntransformationDescription: str
\n
\n
\n
\ntransformationType: str
\n
\n
\n
\n
\nclass openlineage.client.facet.ColumnLineageDatasetFacet(fields=_Nothing.NOTHING)
\n

Bases: BaseFacet

\n

This facet contains column lineage of a dataset.

\n
\n
Parameters:
\n

fields (Dict[str, ColumnLineageDatasetFacetFieldsAdditional])

\n
\n
\n
\n
\nfields: Dict[str, ColumnLineageDatasetFacetFieldsAdditional]
\n
\n
\n
\n
\nclass openlineage.client.facet.ProcessingEngineRunFacet(version, name, openlineageAdapterVersion)
\n

Bases: BaseFacet

\n
\n
Parameters:
\n
    \n
  • version (str)

  • \n
  • name (str)

  • \n
  • openlineageAdapterVersion (str)

  • \n
\n
\n
\n
\n
\nversion: str
\n
\n
\n
\nname: str
\n
\n
\n
\nopenlineageAdapterVersion: str
\n
\n
\n
\n
\nclass openlineage.client.facet.ExtractionError(errorMessage, stackTrace, task, taskNumber)
\n

Bases: BaseFacet

\n
\n
Parameters:
\n
    \n
  • errorMessage (str)

  • \n
  • stackTrace (Optional[str])

  • \n
  • task (Optional[str])

  • \n
  • taskNumber (Optional[int])

  • \n
\n
\n
\n
\n
\nerrorMessage: str
\n
\n
\n
\nstackTrace: Optional[str]
\n
\n
\n
\ntask: Optional[str]
\n
\n
\n
\ntaskNumber: Optional[int]
\n
\n
\n
\n
\nclass openlineage.client.facet.ExtractionErrorRunFacet(totalTasks, failedTasks, errors)
\n

Bases: BaseFacet

\n
\n
Parameters:
\n
\n
\n
\n
\n
\ntotalTasks: int
\n
\n
\n
\nfailedTasks: int
\n
\n
\n
\nerrors: List[ExtractionError]
\n
\n
\n
\n
\n

openlineage.client.facet_v2 module

\n
\n
\nclass openlineage.client.facet_v2.BaseFacet(*, producer='')
\n

Bases: RedactMixin

\n

all fields of the base facet are prefixed with _ to avoid name conflicts in facets

\n
\n
Parameters:
\n

producer (str)

\n
\n
\n
\n
\nproperty skip_redact: list[str]
\n
\n
\n
\n
\nclass openlineage.client.facet_v2.DatasetFacet(*, producer='', deleted=None)
\n

Bases: BaseFacet

\n

A Dataset Facet

\n
\n
Parameters:
\n
    \n
  • producer (str)

  • \n
  • deleted (bool | None)

  • \n
\n
\n
\n
\n
\n
\nclass openlineage.client.facet_v2.InputDatasetFacet(*, producer='')
\n

Bases: BaseFacet

\n

An Input Dataset Facet

\n
\n
Parameters:
\n

producer (str)

\n
\n
\n
\n
\n
\nclass openlineage.client.facet_v2.JobFacet(*, producer='', deleted=None)
\n

Bases: BaseFacet

\n

A Job Facet

\n
\n
Parameters:
\n
    \n
  • producer (str)

  • \n
  • deleted (bool | None)

  • \n
\n
\n
\n
\n
\n
\nclass openlineage.client.facet_v2.OutputDatasetFacet(*, producer='')
\n

Bases: BaseFacet

\n

An Output Dataset Facet

\n
\n
Parameters:
\n

producer (str)

\n
\n
\n
\n
\n
\nclass openlineage.client.facet_v2.RunFacet(*, producer='')
\n

Bases: BaseFacet

\n

A Run Facet

\n
\n
Parameters:
\n

producer (str)

\n
\n
\n
\n
\n
\nopenlineage.client.facet_v2.set_producer(producer)
\n
\n
Parameters:
\n

producer (str)

\n
\n
Return type:
\n

None

\n
\n
\n
\n
\n
\n

openlineage.client.filter module

\n
\n
\nclass openlineage.client.filter.FilterConfig(type=None, match=None, regex=None)
\n

Bases: object

\n
\n
Parameters:
\n
    \n
  • type (str | None)

  • \n
  • match (str | None)

  • \n
  • regex (str | None)

  • \n
\n
\n
\n
\n
\ntype: str | None
\n
\n
\n
\nmatch: str | None
\n
\n
\n
\nregex: str | None
\n
\n
\n
\n
\nclass openlineage.client.filter.Filter
\n

Bases: object

\n
\n
\nfilter_event(event)
\n
\n
Parameters:
\n

event (RunEventType)

\n
\n
Return type:
\n

RunEventType | None

\n
\n
\n
\n
\n
\n
\nclass openlineage.client.filter.ExactMatchFilter(match)
\n

Bases: Filter

\n
\n
Parameters:
\n

match (str)

\n
\n
\n
\n
\nfilter_event(event)
\n
\n
Parameters:
\n

event (RunEventType)

\n
\n
Return type:
\n

RunEventType | None

\n
\n
\n
\n
\n
\n
\nclass openlineage.client.filter.RegexFilter(regex)
\n

Bases: Filter

\n
\n
Parameters:
\n

regex (str)

\n
\n
\n
\n
\nfilter_event(event)
\n
\n
Parameters:
\n

event (RunEventType)

\n
\n
Return type:
\n

RunEventType | None

\n
\n
\n
\n
\n
\n
\nopenlineage.client.filter.create_filter(conf)
\n
\n
Parameters:
\n

conf (FilterConfig)

\n
\n
Return type:
\n

Filter | None

\n
\n
\n
\n
\n
\n

openlineage.client.run module

\n
\n
\nclass openlineage.client.run.RunState(value)
\n

Bases: Enum

\n

An enumeration.

\n
\n
\nSTART = 'START'
\n
\n
\n
\nRUNNING = 'RUNNING'
\n
\n
\n
\nCOMPLETE = 'COMPLETE'
\n
\n
\n
\nABORT = 'ABORT'
\n
\n
\n
\nFAIL = 'FAIL'
\n
\n
\n
\nOTHER = 'OTHER'
\n
\n
\n
\n
\nclass openlineage.client.run.Dataset(namespace, name, facets=_Nothing.NOTHING)
\n

Bases: RedactMixin

\n
\n
Parameters:
\n
    \n
  • namespace (str)

  • \n
  • name (str)

  • \n
  • facets (Dict[Any, Any])

  • \n
\n
\n
\n
\n
\nnamespace: str
\n
\n
\n
\nname: str
\n
\n
\n
\nfacets: Dict[Any, Any]
\n
\n
\n
\n
\nclass openlineage.client.run.InputDataset(namespace, name, facets=_Nothing.NOTHING, inputFacets=_Nothing.NOTHING)
\n

Bases: Dataset

\n
\n
Parameters:
\n
    \n
  • namespace (str)

  • \n
  • name (str)

  • \n
  • facets (Dict[Any, Any])

  • \n
  • inputFacets (Dict[Any, Any])

  • \n
\n
\n
\n
\n
\ninputFacets: Dict[Any, Any]
\n
\n
\n
\n
\nclass openlineage.client.run.OutputDataset(namespace, name, facets=_Nothing.NOTHING, outputFacets=_Nothing.NOTHING)
\n

Bases: Dataset

\n
\n
Parameters:
\n
    \n
  • namespace (str)

  • \n
  • name (str)

  • \n
  • facets (Dict[Any, Any])

  • \n
  • outputFacets (Dict[Any, Any])

  • \n
\n
\n
\n
\n
\noutputFacets: Dict[Any, Any]
\n
\n
\n
\n
\nclass openlineage.client.run.DatasetEvent(eventTime, producer, schemaURL, dataset)
\n

Bases: RedactMixin

\n
\n
Parameters:
\n
    \n
  • eventTime (str)

  • \n
  • producer (str)

  • \n
  • schemaURL (str)

  • \n
  • dataset (Dataset)

  • \n
\n
\n
\n
\n
\neventTime: str
\n
\n
\n
\nproducer: str
\n
\n
\n
\nschemaURL: str
\n
\n
\n
\ndataset: Dataset
\n
\n
\n
\n
\nclass openlineage.client.run.Job(namespace, name, facets=_Nothing.NOTHING)
\n

Bases: RedactMixin

\n
\n
Parameters:
\n
    \n
  • namespace (str)

  • \n
  • name (str)

  • \n
  • facets (Dict[Any, Any])

  • \n
\n
\n
\n
\n
\nnamespace: str
\n
\n
\n
\nname: str
\n
\n
\n
\nfacets: Dict[Any, Any]
\n
\n
\n
\n
\nclass openlineage.client.run.JobEvent(eventTime, producer, schemaURL, job, inputs=_Nothing.NOTHING, outputs=_Nothing.NOTHING)
\n

Bases: RedactMixin

\n
\n
Parameters:
\n
    \n
  • eventTime (str)

  • \n
  • producer (str)

  • \n
  • schemaURL (str)

  • \n
  • job (Job)

  • \n
  • inputs (Optional[List[Dataset]])

  • \n
  • outputs (Optional[List[Dataset]])

  • \n
\n
\n
\n
\n
\neventTime: str
\n
\n
\n
\nproducer: str
\n
\n
\n
\nschemaURL: str
\n
\n
\n
\njob: Job
\n
\n
\n
\ninputs: Optional[List[Dataset]]
\n
\n
\n
\noutputs: Optional[List[Dataset]]
\n
\n
\n
\n
\nclass openlineage.client.run.Run(runId, facets=_Nothing.NOTHING)
\n

Bases: RedactMixin

\n
\n
Parameters:
\n
    \n
  • runId (str)

  • \n
  • facets (Dict[Any, Any])

  • \n
\n
\n
\n
\n
\nrunId: str
\n
\n
\n
\nfacets: Dict[Any, Any]
\n
\n
\n
\ncheck(attribute, value)
\n
\n
Parameters:
\n
    \n
  • attribute (str)

  • \n
  • value (str)

  • \n
\n
\n
Return type:
\n

None

\n
\n
\n
\n
\n
\n
\nclass openlineage.client.run.RunEvent(eventType, eventTime, run, job, producer, inputs=_Nothing.NOTHING, outputs=_Nothing.NOTHING, schemaURL='https://openlineage.io/spec/1-0-5/OpenLineage.json#/definitions/RunEvent')
\n

Bases: RedactMixin

\n
\n
Parameters:
\n
    \n
  • eventType (RunState)

  • \n
  • eventTime (str)

  • \n
  • run (Run)

  • \n
  • job (Job)

  • \n
  • producer (str)

  • \n
  • inputs (Optional[List[Dataset]])

  • \n
  • outputs (Optional[List[Dataset]])

  • \n
  • schemaURL (str)

  • \n
\n
\n
\n
\n
\neventType: RunState
\n
\n
\n
\neventTime: str
\n
\n
\n
\nrun: Run
\n
\n
\n
\njob: Job
\n
\n
\n
\nproducer: str
\n
\n
\n
\ninputs: Optional[List[Dataset]]
\n
\n
\n
\noutputs: Optional[List[Dataset]]
\n
\n
\n
\nschemaURL: str
\n
\n
\n
\ncheck(attribute, value)
\n
\n
Parameters:
\n
    \n
  • attribute (str)

  • \n
  • value (str)

  • \n
\n
\n
Return type:
\n

None

\n
\n
\n
\n
\n
\n
\n

openlineage.client.serde module

\n
\n
\nclass openlineage.client.serde.Serde
\n

Bases: object

\n
\n
\nclassmethod remove_nulls_and_enums(obj)
\n
\n
Parameters:
\n

obj (Any)

\n
\n
Return type:
\n

Any

\n
\n
\n
\n
\n
\nclassmethod to_dict(obj)
\n
\n
Parameters:
\n

obj (Any)

\n
\n
Return type:
\n

dict[Any, Any]

\n
\n
\n
\n
\n
\nclassmethod to_json(obj)
\n
\n
Parameters:
\n

obj (Any)

\n
\n
Return type:
\n

str

\n
\n
\n
\n
\n
\n
\n

openlineage.client.utils module

\n
\n
\nopenlineage.client.utils.import_from_string(path)
\n
\n
Parameters:
\n

path (str)

\n
\n
Return type:
\n

type[Any]

\n
\n
\n
\n
\n
\nopenlineage.client.utils.try_import_from_string(path)
\n
\n
Parameters:
\n

path (str)

\n
\n
Return type:
\n

type[Any] | None

\n
\n
\n
\n
\n
\nopenlineage.client.utils.get_only_specified_fields(clazz, params)
\n
\n
Parameters:
\n
    \n
  • clazz (type[Any])

  • \n
  • params (dict[str, Any])

  • \n
\n
\n
Return type:
\n

dict[str, Any]

\n
\n
\n
\n
\n
\nopenlineage.client.utils.deep_merge_dicts(dict1, dict2)
\n

Deep merges two dictionaries.

\n

This function merges two dictionaries while handling nested dictionaries.\nFor keys that exist in both dictionaries, the values from dict2 take precedence.\nIf a key exists in both dictionaries and the values are dictionaries themselves,\nthey are merged recursively.\nThis function merges only dictionaries. If key is of different type, e.g. list\nit does not work properly.

\n
\n
Parameters:
\n
    \n
  • dict1 (dict[Any, Any])

  • \n
  • dict2 (dict[Any, Any])

  • \n
\n
\n
Return type:
\n

dict[Any, Any]

\n
\n
\n
\n
\n
\nclass openlineage.client.utils.RedactMixin
\n

Bases: object

\n
\n
\nproperty skip_redact: list[str]
\n
\n
\n
\n
\n

openlineage.client.uuid module

\n
\n
\nopenlineage.client.uuid.generate_new_uuid(instant=None)
\n

Generate new UUID for an instant of time. Each function call returns a new UUID value.

\n

UUID version is an implementation detail, and should not be relied on.\nFor now it is [UUIDv7](https://datatracker.ietf.org/doc/rfc9562/), so for increasing instant values,\nreturned UUID is always greater than previous one.

\n

Using uuid6 lib implementation (MIT License), with few changes:\n* oittaa/uuid6-python\n* oittaa/uuid6-python

\n

Added in v1.15.0

\n
\n
Parameters:
\n

instant (datetime | None) \u2013 instant of time used to generate UUID. If not provided, current time is used.

\n
\n
Return type:
\n

UUID

\n
\n
Returns:
\n

UUID

\n
\n
\n
\n
\n
\nopenlineage.client.uuid.generate_static_uuid(instant, data)
\n

Generate UUID for instant of time and input data.\nCalling function with same arguments always produces the same result.

\n

UUID version is an implementation detail, and **should not* be relied on.\nFor now it is [UUIDv7](https://datatracker.ietf.org/doc/rfc9562/), so for increasing instant values,\nreturned UUID is always greater than previous one. The only difference from RFC 9562 is that\nleast significant bytes are not random, but instead a SHA-1 hash of input data.

\n

Using uuid6 lib implementation (MIT License), with few changes:\n* oittaa/uuid6-python\n* oittaa/uuid6-python

\n

Added in v1.15.0

\n
\n
Parameters:
\n
    \n
  • instant (datetime) \u2013 instant of time used to generate UUID. If not provided, current time is used.

  • \n
  • data (bytes) \u2013 input data to generate random part from.

  • \n
\n
\n
Return type:
\n

UUID

\n
\n
Returns:
\n

UUID

\n
\n
\n
\n
\n
\n

openlineage.client.generated.base module

\n
\n
\nopenlineage.client.generated.base.set_producer(producer)
\n
\n
Parameters:
\n

producer (str)

\n
\n
Return type:
\n

None

\n
\n
\n
\n
\n
\nclass openlineage.client.generated.base.BaseEvent(*, eventTime, producer='')
\n

Bases: RedactMixin

\n
\n
Parameters:
\n
    \n
  • eventTime (str)

  • \n
  • producer (str)

  • \n
\n
\n
\n
\n
\neventTime: str
\n

the time the event occurred at

\n
\n
\n
\nproducer: str
\n
\n
\n
\nschemaURL: str
\n
\n
\n
\nproperty skip_redact
\n
\n
\n
\neventtime_check(attribute, value)
\n
\n
Parameters:
\n
    \n
  • attribute (str)

  • \n
  • value (str)

  • \n
\n
\n
Return type:
\n

None

\n
\n
\n
\n
\n
\nproducer_check(attribute, value)
\n
\n
Parameters:
\n
    \n
  • attribute (str)

  • \n
  • value (str)

  • \n
\n
\n
Return type:
\n

None

\n
\n
\n
\n
\n
\nschemaurl_check(attribute, value)
\n
\n
Parameters:
\n
    \n
  • attribute (str)

  • \n
  • value (str)

  • \n
\n
\n
Return type:
\n

None

\n
\n
\n
\n
\n
\n
\nclass openlineage.client.generated.base.BaseFacet(*, producer='')
\n

Bases: RedactMixin

\n

all fields of the base facet are prefixed with _ to avoid name conflicts in facets

\n
\n
Parameters:
\n

producer (str)

\n
\n
\n
\n
\nproperty skip_redact
\n
\n
\n
\n
\nclass openlineage.client.generated.base.Dataset(namespace, name, *, facets=_Nothing.NOTHING)
\n

Bases: RedactMixin

\n
\n
Parameters:
\n
    \n
  • namespace (str)

  • \n
  • name (str)

  • \n
  • facets (dict[str, DatasetFacet] | None)

  • \n
\n
\n
\n
\n
\nnamespace: str
\n

The namespace containing that dataset

\n
\n
\n
\nname: str
\n

The unique name for that dataset within that namespace

\n
\n
\n
\nfacets: dict[str, DatasetFacet] | None
\n

The facets for this dataset

\n
\n
\n
\n
\nclass openlineage.client.generated.base.DatasetEvent(*, eventTime, producer='', dataset)
\n

Bases: BaseEvent

\n
\n
Parameters:
\n
    \n
  • eventTime (str)

  • \n
  • producer (str)

  • \n
  • dataset (StaticDataset)

  • \n
\n
\n
\n
\n
\ndataset: StaticDataset
\n
\n
\n
\n
\nclass openlineage.client.generated.base.DatasetFacet(*, producer='', deleted=None)
\n

Bases: BaseFacet

\n

A Dataset Facet

\n
\n
Parameters:
\n
    \n
  • producer (str)

  • \n
  • deleted (bool | None)

  • \n
\n
\n
\n
\n
\n
\nclass openlineage.client.generated.base.EventType(value)
\n

Bases: Enum

\n

the current transition of the run state. It is required to issue 1 START event and 1 of [ COMPLETE,\nABORT, FAIL ] event per run. Additional events with OTHER eventType can be added to the same run.\nFor example to send additional metadata after the run is complete

\n
\n
\nSTART = 'START'
\n
\n
\n
\nRUNNING = 'RUNNING'
\n
\n
\n
\nCOMPLETE = 'COMPLETE'
\n
\n
\n
\nABORT = 'ABORT'
\n
\n
\n
\nFAIL = 'FAIL'
\n
\n
\n
\nOTHER = 'OTHER'
\n
\n
\n
\n
\nclass openlineage.client.generated.base.InputDataset(namespace, name, inputFacets=_Nothing.NOTHING, *, facets=_Nothing.NOTHING)
\n

Bases: Dataset

\n

An input dataset

\n
\n
\ninputFacets: dict[str, InputDatasetFacet] | None
\n

The input facets for this dataset.

\n
\n
\n
\n
\nclass openlineage.client.generated.base.InputDatasetFacet(*, producer='')
\n

Bases: BaseFacet

\n

An Input Dataset Facet

\n
\n
Parameters:
\n

producer (str)

\n
\n
\n
\n
\n
\nclass openlineage.client.generated.base.Job(namespace, name, facets=_Nothing.NOTHING)
\n

Bases: RedactMixin

\n
\n
Parameters:
\n
    \n
  • namespace (str)

  • \n
  • name (str)

  • \n
  • facets (dict[str, JobFacet] | None)

  • \n
\n
\n
\n
\n
\nnamespace: str
\n

The namespace containing that job

\n
\n
\n
\nname: str
\n

The unique name for that job within that namespace

\n
\n
\n
\nfacets: dict[str, JobFacet] | None
\n

The job facets.

\n
\n
\n
\n
\nclass openlineage.client.generated.base.JobEvent(*, eventTime, producer='', job, inputs=_Nothing.NOTHING, outputs=_Nothing.NOTHING)
\n

Bases: BaseEvent

\n
\n
\njob: Job
\n
\n
\n
\ninputs: list[InputDataset] | None
\n

The set of input datasets.

\n
\n
\n
\noutputs: list[OutputDataset] | None
\n

The set of output datasets.

\n
\n
\n
\n
\nclass openlineage.client.generated.base.JobFacet(*, producer='', deleted=None)
\n

Bases: BaseFacet

\n

A Job Facet

\n
\n
Parameters:
\n
    \n
  • producer (str)

  • \n
  • deleted (bool | None)

  • \n
\n
\n
\n
\n
\n
\nclass openlineage.client.generated.base.OutputDataset(namespace, name, outputFacets=_Nothing.NOTHING, *, facets=_Nothing.NOTHING)
\n

Bases: Dataset

\n

An output dataset

\n
\n
\noutputFacets: dict[str, OutputDatasetFacet] | None
\n

The output facets for this dataset

\n
\n
\n
\n
\nclass openlineage.client.generated.base.OutputDatasetFacet(*, producer='')
\n

Bases: BaseFacet

\n

An Output Dataset Facet

\n
\n
Parameters:
\n

producer (str)

\n
\n
\n
\n
\n
\nclass openlineage.client.generated.base.Run(runId, facets=_Nothing.NOTHING)
\n

Bases: RedactMixin

\n
\n
Parameters:
\n
    \n
  • runId (str)

  • \n
  • facets (dict[str, RunFacet] | None)

  • \n
\n
\n
\n
\n
\nrunId: str
\n

The globally unique ID of the run associated with the job.

\n
\n
\n
\nfacets: dict[str, RunFacet] | None
\n

The run facets.

\n
\n
\n
\nrunid_check(attribute, value)
\n
\n
Parameters:
\n
    \n
  • attribute (str)

  • \n
  • value (str)

  • \n
\n
\n
Return type:
\n

None

\n
\n
\n
\n
\n
\n
\nclass openlineage.client.generated.base.RunEvent(*, eventTime, producer='', run, job, eventType=None, inputs=_Nothing.NOTHING, outputs=_Nothing.NOTHING)
\n

Bases: BaseEvent

\n
\n
Parameters:
\n
    \n
  • eventTime (str)

  • \n
  • producer (str)

  • \n
  • run (Run)

  • \n
  • job (Job)

  • \n
  • eventType (EventType | None)

  • \n
  • inputs (list[InputDataset] | None)

  • \n
  • outputs (list[OutputDataset] | None)

  • \n
\n
\n
\n
\n
\nrun: Run
\n
\n
\n
\njob: Job
\n
\n
\n
\neventType: EventType | None
\n

the current transition of the run state. It is required to issue 1 START event and 1 of [ COMPLETE,\nABORT, FAIL ] event per run. Additional events with OTHER eventType can be added to the same run.\nFor example to send additional metadata after the run is complete

\n
\n
\n
\ninputs: list[InputDataset] | None
\n

The set of input datasets.

\n
\n
\n
\noutputs: list[OutputDataset] | None
\n

The set of output datasets.

\n
\n
\n
\n
\nclass openlineage.client.generated.base.RunFacet(*, producer='')
\n

Bases: BaseFacet

\n

A Run Facet

\n
\n
Parameters:
\n

producer (str)

\n
\n
\n
\n
\n
\nclass openlineage.client.generated.base.StaticDataset(namespace, name, *, facets=_Nothing.NOTHING)
\n

Bases: Dataset

\n

A Dataset sent within static metadata events

\n
\n
Parameters:
\n
    \n
  • namespace (str)

  • \n
  • name (str)

  • \n
  • facets (dict[str, DatasetFacet] | None)

  • \n
\n
\n
\n
\n
\n
\n

openlineage.client.generated.column_lineage_dataset module

\n
\n
\nclass openlineage.client.generated.column_lineage_dataset.ColumnLineageDatasetFacet(fields, dataset=_Nothing.NOTHING, *, producer='', deleted=None)
\n

Bases: DatasetFacet

\n
\n
Parameters:
\n
    \n
  • fields (dict[str, Fields])

  • \n
  • dataset (list[InputField] | None)

  • \n
  • producer (str)

  • \n
  • deleted (bool | None)

  • \n
\n
\n
\n
\n
\nfields: dict[str, Fields]
\n

Column level lineage that maps output fields into input fields used to evaluate them.

\n
\n
\n
\ndataset: list[InputField] | None
\n

Column level lineage that affects the whole dataset. This includes filtering, sorting, grouping\n(aggregates), joining, window functions, etc.

\n
\n
\n
\n
\nclass openlineage.client.generated.column_lineage_dataset.Fields(inputFields, transformationDescription=None, transformationType=None)
\n

Bases: RedactMixin

\n
\n
Parameters:
\n
    \n
  • inputFields (list[InputField])

  • \n
  • transformationDescription (str | None)

  • \n
  • transformationType (str | None)

  • \n
\n
\n
\n
\n
\ninputFields: list[InputField]
\n
\n
\n
\ntransformationDescription: str | None
\n

a string representation of the transformation applied

\n
\n
\n
\ntransformationType: str | None
\n

no\noriginal data available (like a hash of PII for example)

\n
\n
Type:
\n

IDENTITY|MASKED reflects a clearly defined behavior. IDENTITY

\n
\n
Type:
\n

exact same as input; MASKED

\n
\n
\n
\n
\n
\n
\nclass openlineage.client.generated.column_lineage_dataset.InputField(namespace, name, field, transformations=_Nothing.NOTHING)
\n

Bases: RedactMixin

\n

Represents a single dependency on some field (column).

\n
\n
Parameters:
\n
    \n
  • namespace (str)

  • \n
  • name (str)

  • \n
  • field (str)

  • \n
  • transformations (list[Transformation] | None)

  • \n
\n
\n
\n
\n
\nnamespace: str
\n

The input dataset namespace

\n
\n
\n
\nname: str
\n

The input dataset name

\n
\n
\n
\nfield: str
\n

The input field

\n
\n
\n
\ntransformations: list[Transformation] | None
\n
\n
\n
\n
\nclass openlineage.client.generated.column_lineage_dataset.Transformation(type, subtype=None, description=None, masking=None)
\n

Bases: RedactMixin

\n
\n
Parameters:
\n
    \n
  • type (str)

  • \n
  • subtype (str | None)

  • \n
  • description (str | None)

  • \n
  • masking (bool | None)

  • \n
\n
\n
\n
\n
\ntype: str
\n

DIRECT, INDIRECT

\n
\n
Type:
\n

The type of the transformation. Allowed values are

\n
\n
\n
\n
\n
\nsubtype: str | None
\n

The subtype of the transformation

\n
\n
\n
\ndescription: str | None
\n

a string representation of the transformation applied

\n
\n
\n
\nmasking: bool | None
\n

is transformation masking the data or not

\n
\n
\n
\n
\n

openlineage.client.generated.data_quality_assertions_dataset module

\n
\n
\nclass openlineage.client.generated.data_quality_assertions_dataset.Assertion(assertion, success, column=None)
\n

Bases: RedactMixin

\n
\n
Parameters:
\n
    \n
  • assertion (str)

  • \n
  • success (bool)

  • \n
  • column (str | None)

  • \n
\n
\n
\n
\n
\nassertion: str
\n

Type of expectation test that dataset is subjected to

\n
\n
\n
\nsuccess: bool
\n
\n
\n
\ncolumn: str | None
\n

Column that expectation is testing. It should match the name provided in SchemaDatasetFacet. If\ncolumn field is empty, then expectation refers to whole dataset.

\n
\n
\n
\n
\nclass openlineage.client.generated.data_quality_assertions_dataset.DataQualityAssertionsDatasetFacet(assertions, *, producer='')
\n

Bases: InputDatasetFacet

\n

list of tests performed on dataset or dataset columns, and their results

\n
\n
Parameters:
\n
    \n
  • assertions (list[Assertion])

  • \n
  • producer (str)

  • \n
\n
\n
\n
\n
\nassertions: list[Assertion]
\n
\n
\n
\n
\n

openlineage.client.generated.data_quality_metrics_input_dataset module

\n
\n
\nclass openlineage.client.generated.data_quality_metrics_input_dataset.ColumnMetrics(nullCount=None, distinctCount=None, sum=None, count=None, min=None, max=None, quantiles=_Nothing.NOTHING)
\n

Bases: RedactMixin

\n
\n
Parameters:
\n
    \n
  • nullCount (int | None)

  • \n
  • distinctCount (int | None)

  • \n
  • sum (float | None)

  • \n
  • count (float | None)

  • \n
  • min (float | None)

  • \n
  • max (float | None)

  • \n
  • quantiles (dict[str, float] | None)

  • \n
\n
\n
\n
\n
\nnullCount: int | None
\n

The number of null values in this column for the rows evaluated

\n
\n
\n
\ndistinctCount: int | None
\n

The number of distinct values in this column for the rows evaluated

\n
\n
\n
\nsum: float | None
\n

The total sum of values in this column for the rows evaluated

\n
\n
\n
\ncount: float | None
\n

The number of values in this column

\n
\n
\n
\nmin: float | None
\n
\n
\n
\nmax: float | None
\n
\n
\n
\nquantiles: dict[str, float] | None
\n

0.1 0.25 0.5 0.75 1

\n
\n
Type:
\n

The property key is the quantile. Examples

\n
\n
\n
\n
\n
\n
\nclass openlineage.client.generated.data_quality_metrics_input_dataset.DataQualityMetricsInputDatasetFacet(columnMetrics, rowCount=None, bytes=None, fileCount=None, *, producer='')
\n

Bases: InputDatasetFacet

\n
\n
Parameters:
\n
    \n
  • columnMetrics (dict[str, ColumnMetrics])

  • \n
  • rowCount (int | None)

  • \n
  • bytes (int | None)

  • \n
  • fileCount (int | None)

  • \n
  • producer (str)

  • \n
\n
\n
\n
\n
\ncolumnMetrics: dict[str, ColumnMetrics]
\n

The property key is the column name

\n
\n
\n
\nrowCount: int | None
\n

The number of rows evaluated

\n
\n
\n
\nbytes: int | None
\n

The size in bytes

\n
\n
\n
\nfileCount: int | None
\n

The number of files evaluated

\n
\n
\n
\n
\n

openlineage.client.generated.dataset_version_dataset module

\n
\n
\nclass openlineage.client.generated.dataset_version_dataset.DatasetVersionDatasetFacet(datasetVersion, *, producer='', deleted=None)
\n

Bases: DatasetFacet

\n
\n
Parameters:
\n
    \n
  • datasetVersion (str)

  • \n
  • producer (str)

  • \n
  • deleted (bool | None)

  • \n
\n
\n
\n
\n
\ndatasetVersion: str
\n

The version of the dataset.

\n
\n
\n
\n
\n

openlineage.client.generated.datasource_dataset module

\n
\n
\nclass openlineage.client.generated.datasource_dataset.DatasourceDatasetFacet(name=None, uri=None, *, producer='', deleted=None)
\n

Bases: DatasetFacet

\n
\n
Parameters:
\n
    \n
  • name (str | None)

  • \n
  • uri (str | None)

  • \n
  • producer (str)

  • \n
  • deleted (bool | None)

  • \n
\n
\n
\n
\n
\nname: str | None
\n
\n
\n
\nuri: str | None
\n
\n
\n
\nuri_check(attribute, value)
\n
\n
Parameters:
\n
    \n
  • attribute (str)

  • \n
  • value (str)

  • \n
\n
\n
Return type:
\n

None

\n
\n
\n
\n
\n
\n
\n

openlineage.client.generated.documentation_dataset module

\n
\n
\nclass openlineage.client.generated.documentation_dataset.DocumentationDatasetFacet(description, *, producer='', deleted=None)
\n

Bases: DatasetFacet

\n
\n
Parameters:
\n
    \n
  • description (str)

  • \n
  • producer (str)

  • \n
  • deleted (bool | None)

  • \n
\n
\n
\n
\n
\ndescription: str
\n

The description of the dataset.

\n
\n
\n
\n
\n

openlineage.client.generated.documentation_job module

\n
\n
\nclass openlineage.client.generated.documentation_job.DocumentationJobFacet(description, *, producer='', deleted=None)
\n

Bases: JobFacet

\n
\n
Parameters:
\n
    \n
  • description (str)

  • \n
  • producer (str)

  • \n
  • deleted (bool | None)

  • \n
\n
\n
\n
\n
\ndescription: str
\n

The description of the job.

\n
\n
\n
\n
\n

openlineage.client.generated.error_message_run module

\n
\n
\nclass openlineage.client.generated.error_message_run.ErrorMessageRunFacet(message, programmingLanguage, stackTrace=None, *, producer='')
\n

Bases: RunFacet

\n
\n
Parameters:
\n
    \n
  • message (str)

  • \n
  • programmingLanguage (str)

  • \n
  • stackTrace (str | None)

  • \n
  • producer (str)

  • \n
\n
\n
\n
\n
\nmessage: str
\n

A human-readable string representing error message generated by observed system

\n
\n
\n
\nprogrammingLanguage: str
\n

Programming language the observed system uses.

\n
\n
\n
\nstackTrace: str | None
\n

A language-specific stack trace generated by observed system

\n
\n
\n
\n
\n

openlineage.client.generated.external_query_run module

\n
\n
\nclass openlineage.client.generated.external_query_run.ExternalQueryRunFacet(externalQueryId, source, *, producer='')
\n

Bases: RunFacet

\n
\n
Parameters:
\n
    \n
  • externalQueryId (str)

  • \n
  • source (str)

  • \n
  • producer (str)

  • \n
\n
\n
\n
\n
\nexternalQueryId: str
\n

Identifier for the external system

\n
\n
\n
\nsource: str
\n

source of the external query

\n
\n
\n
\n
\n

openlineage.client.generated.extraction_error_run module

\n
\n
\nclass openlineage.client.generated.extraction_error_run.Error(errorMessage, stackTrace=None, task=None, taskNumber=None)
\n

Bases: RedactMixin

\n
\n
Parameters:
\n
    \n
  • errorMessage (str)

  • \n
  • stackTrace (str | None)

  • \n
  • task (str | None)

  • \n
  • taskNumber (int | None)

  • \n
\n
\n
\n
\n
\nerrorMessage: str
\n

Text representation of extraction error message.

\n
\n
\n
\nstackTrace: str | None
\n

Stack trace of extraction error message

\n
\n
\n
\ntask: str | None
\n

Text representation of task that failed. This can be, for example, SQL statement that parser could\nnot interpret.

\n
\n
\n
\ntaskNumber: int | None
\n

Order of task (counted from 0).

\n
\n
\n
\n
\nclass openlineage.client.generated.extraction_error_run.ExtractionErrorRunFacet(totalTasks, failedTasks, errors, *, producer='')
\n

Bases: RunFacet

\n
\n
Parameters:
\n
    \n
  • totalTasks (int)

  • \n
  • failedTasks (int)

  • \n
  • errors (list[Error])

  • \n
  • producer (str)

  • \n
\n
\n
\n
\n
\ntotalTasks: int
\n

The number of distinguishable tasks in a run that were processed by OpenLineage, whether\nsuccessfully or not. Those could be, for example, distinct SQL statements.

\n
\n
\n
\nfailedTasks: int
\n

The number of distinguishable tasks in a run that were processed not successfully by OpenLineage.\nThose could be, for example, distinct SQL statements.

\n
\n
\n
\nerrors: list[Error]
\n
\n
\n
\n
\n

openlineage.client.generated.job_type_job module

\n
\n
\nclass openlineage.client.generated.job_type_job.JobTypeJobFacet(processingType, integration, jobType=None, *, producer='', deleted=None)
\n

Bases: JobFacet

\n
\n
Parameters:
\n
    \n
  • processingType (str)

  • \n
  • integration (str)

  • \n
  • jobType (str | None)

  • \n
  • producer (str)

  • \n
  • deleted (bool | None)

  • \n
\n
\n
\n
\n
\nprocessingType: str
\n

BATCH or STREAMING

\n
\n
Type:
\n

Job processing type like

\n
\n
\n
\n
\n
\nintegration: str
\n

for example SPARK|DBT|AIRFLOW|FLINK

\n
\n
Type:
\n

OpenLineage integration type of this job

\n
\n
\n
\n
\n
\njobType: str | None
\n

QUERY|COMMAND|DAG|TASK|JOB|MODEL. This is an integration-specific field.

\n
\n
Type:
\n

Run type, for example

\n
\n
\n
\n
\n
\n
\n

openlineage.client.generated.lifecycle_state_change_dataset module

\n
\n
\nclass openlineage.client.generated.lifecycle_state_change_dataset.LifecycleStateChange(value)
\n

Bases: Enum

\n

The lifecycle state change.

\n
\n
\nALTER = 'ALTER'
\n
\n
\n
\nCREATE = 'CREATE'
\n
\n
\n
\nDROP = 'DROP'
\n
\n
\n
\nOVERWRITE = 'OVERWRITE'
\n
\n
\n
\nRENAME = 'RENAME'
\n
\n
\n
\nTRUNCATE = 'TRUNCATE'
\n
\n
\n
\n
\nclass openlineage.client.generated.lifecycle_state_change_dataset.LifecycleStateChangeDatasetFacet(lifecycleStateChange, previousIdentifier=None, *, producer='', deleted=None)
\n

Bases: DatasetFacet

\n
\n
Parameters:
\n
    \n
  • lifecycleStateChange (LifecycleStateChange)

  • \n
  • previousIdentifier (PreviousIdentifier | None)

  • \n
  • producer (str)

  • \n
  • deleted (bool | None)

  • \n
\n
\n
\n
\n
\nlifecycleStateChange: LifecycleStateChange
\n

The lifecycle state change.

\n
\n
\n
\npreviousIdentifier: PreviousIdentifier | None
\n

Previous name of the dataset in case of renaming it.

\n
\n
\n
\n
\nclass openlineage.client.generated.lifecycle_state_change_dataset.PreviousIdentifier(name, namespace)
\n

Bases: RedactMixin

\n

Previous name of the dataset in case of renaming it.

\n
\n
Parameters:
\n
    \n
  • name (str)

  • \n
  • namespace (str)

  • \n
\n
\n
\n
\n
\nname: str
\n
\n
\n
\nnamespace: str
\n
\n
\n
\n
\n

openlineage.client.generated.nominal_time_run module

\n
\n
\nclass openlineage.client.generated.nominal_time_run.NominalTimeRunFacet(nominalStartTime, nominalEndTime=None, *, producer='')
\n

Bases: RunFacet

\n
\n
Parameters:
\n
    \n
  • nominalStartTime (str)

  • \n
  • nominalEndTime (str | None)

  • \n
  • producer (str)

  • \n
\n
\n
\n
\n
\nnominalStartTime: str
\n

//en.wikipedia.org/wiki/ISO_8601) timestamp representing the nominal start time\n(included) of the run. AKA the schedule time

\n
\n
Type:
\n

An [ISO-8601](https

\n
\n
\n
\n
\n
\nnominalEndTime: str | None
\n

//en.wikipedia.org/wiki/ISO_8601) timestamp representing the nominal end time\n(excluded) of the run. (Should be the nominal start time of the next run)

\n
\n
Type:
\n

An [ISO-8601](https

\n
\n
\n
\n
\n
\nnominalstarttime_check(attribute, value)
\n
\n
Parameters:
\n
    \n
  • attribute (str)

  • \n
  • value (str)

  • \n
\n
\n
Return type:
\n

None

\n
\n
\n
\n
\n
\nnominalendtime_check(attribute, value)
\n
\n
Parameters:
\n
    \n
  • attribute (str)

  • \n
  • value (str)

  • \n
\n
\n
Return type:
\n

None

\n
\n
\n
\n
\n
\n
\n

openlineage.client.generated.output_statistics_output_dataset module

\n
\n
\nclass openlineage.client.generated.output_statistics_output_dataset.OutputStatisticsOutputDatasetFacet(rowCount=None, size=None, fileCount=None, *, producer='')
\n

Bases: OutputDatasetFacet

\n
\n
Parameters:
\n
    \n
  • rowCount (int | None)

  • \n
  • size (int | None)

  • \n
  • fileCount (int | None)

  • \n
  • producer (str)

  • \n
\n
\n
\n
\n
\nrowCount: int | None
\n

The number of rows written to the dataset

\n
\n
\n
\nsize: int | None
\n

The size in bytes written to the dataset

\n
\n
\n
\nfileCount: int | None
\n

The number of files written to the dataset

\n
\n
\n
\n
\n

openlineage.client.generated.ownership_dataset module

\n
\n
\nclass openlineage.client.generated.ownership_dataset.Owner(name, type=None)
\n

Bases: RedactMixin

\n
\n
Parameters:
\n
    \n
  • name (str)

  • \n
  • type (str | None)

  • \n
\n
\n
\n
\n
\nname: str
\n

the identifier of the owner of the Dataset. It is recommended to define this as a URN. For example\napplication:foo, user:jdoe, team:data

\n
\n
\n
\ntype: str | None
\n

The type of ownership (optional)

\n
\n
\n
\n
\nclass openlineage.client.generated.ownership_dataset.OwnershipDatasetFacet(owners=_Nothing.NOTHING, *, producer='', deleted=None)
\n

Bases: DatasetFacet

\n
\n
Parameters:
\n
    \n
  • owners (list[Owner] | None)

  • \n
  • producer (str)

  • \n
  • deleted (bool | None)

  • \n
\n
\n
\n
\n
\nowners: list[Owner] | None
\n

The owners of the dataset.

\n
\n
\n
\n
\n

openlineage.client.generated.ownership_job module

\n
\n
\nclass openlineage.client.generated.ownership_job.Owner(name, type=None)
\n

Bases: RedactMixin

\n
\n
Parameters:
\n
    \n
  • name (str)

  • \n
  • type (str | None)

  • \n
\n
\n
\n
\n
\nname: str
\n

the identifier of the owner of the Job. It is recommended to define this as a URN. For example\napplication:foo, user:jdoe, team:data

\n
\n
\n
\ntype: str | None
\n

The type of ownership (optional)

\n
\n
\n
\n
\nclass openlineage.client.generated.ownership_job.OwnershipJobFacet(owners=_Nothing.NOTHING, *, producer='', deleted=None)
\n

Bases: JobFacet

\n
\n
Parameters:
\n
    \n
  • owners (list[Owner] | None)

  • \n
  • producer (str)

  • \n
  • deleted (bool | None)

  • \n
\n
\n
\n
\n
\nowners: list[Owner] | None
\n

The owners of the job.

\n
\n
\n
\n
\n

openlineage.client.generated.parent_run module

\n
\n
\nclass openlineage.client.generated.parent_run.Job(namespace, name)
\n

Bases: RedactMixin

\n
\n
Parameters:
\n
    \n
  • namespace (str)

  • \n
  • name (str)

  • \n
\n
\n
\n
\n
\nnamespace: str
\n

The namespace containing that job

\n
\n
\n
\nname: str
\n

The unique name for that job within that namespace

\n
\n
\n
\n
\nclass openlineage.client.generated.parent_run.ParentRunFacet(run, job, *, producer='')
\n

Bases: RunFacet

\n

the id of the parent run and job, iff this run was spawn from an other run (for example, the Dag run\nscheduling its tasks)

\n
\n
Parameters:
\n
    \n
  • run (Run)

  • \n
  • job (Job)

  • \n
  • producer (str)

  • \n
\n
\n
\n
\n
\nrun: Run
\n
\n
\n
\njob: Job
\n
\n
\n
\nclassmethod create(runId, namespace, name)
\n
\n
Parameters:
\n
    \n
  • runId (str)

  • \n
  • namespace (str)

  • \n
  • name (str)

  • \n
\n
\n
Return type:
\n

ParentRunFacet

\n
\n
\n
\n
\n
\n
\nclass openlineage.client.generated.parent_run.Run(runId)
\n

Bases: RedactMixin

\n
\n
Parameters:
\n

runId (str)

\n
\n
\n
\n
\nrunId: str
\n

The globally unique ID of the run associated with the job.

\n
\n
\n
\nrunid_check(attribute, value)
\n
\n
Parameters:
\n
    \n
  • attribute (str)

  • \n
  • value (str)

  • \n
\n
\n
Return type:
\n

None

\n
\n
\n
\n
\n
\n
\n

openlineage.client.generated.processing_engine_run module

\n
\n
\nclass openlineage.client.generated.processing_engine_run.ProcessingEngineRunFacet(version, name=None, openlineageAdapterVersion=None, *, producer='')
\n

Bases: RunFacet

\n
\n
Parameters:
\n
    \n
  • version (str)

  • \n
  • name (str | None)

  • \n
  • openlineageAdapterVersion (str | None)

  • \n
  • producer (str)

  • \n
\n
\n
\n
\n
\nversion: str
\n

Processing engine version. Might be Airflow or Spark version.

\n
\n
\n
\nname: str | None
\n

Processing engine name, e.g. Airflow or Spark

\n
\n
\n
\nopenlineageAdapterVersion: str | None
\n

OpenLineage adapter package version. Might be e.g. OpenLineage Airflow integration package version

\n
\n
\n
\n
\n

openlineage.client.generated.schema_dataset module

\n
\n
\nclass openlineage.client.generated.schema_dataset.SchemaDatasetFacet(fields=_Nothing.NOTHING, *, producer='', deleted=None)
\n

Bases: DatasetFacet

\n
\n
Parameters:
\n
    \n
  • fields (list[SchemaDatasetFacetFields] | None)

  • \n
  • producer (str)

  • \n
  • deleted (bool | None)

  • \n
\n
\n
\n
\n
\nfields: list[SchemaDatasetFacetFields] | None
\n

The fields of the data source.

\n
\n
\n
\n
\nclass openlineage.client.generated.schema_dataset.SchemaDatasetFacetFields(name, type=None, description=None, fields=_Nothing.NOTHING)
\n

Bases: RedactMixin

\n
\n
Parameters:
\n
    \n
  • name (str)

  • \n
  • type (str | None)

  • \n
  • description (str | None)

  • \n
  • fields (list[SchemaDatasetFacetFields] | None)

  • \n
\n
\n
\n
\n
\nname: str
\n

The name of the field.

\n
\n
\n
\ntype: str | None
\n

The type of the field.

\n
\n
\n
\ndescription: str | None
\n

The description of the field.

\n
\n
\n
\nfields: list[SchemaDatasetFacetFields] | None
\n

Nested struct fields.

\n
\n
\n
\n
\n

openlineage.client.generated.source_code_job module

\n
\n
\nclass openlineage.client.generated.source_code_job.SourceCodeJobFacet(language, sourceCode, *, producer='', deleted=None)
\n

Bases: JobFacet

\n
\n
Parameters:
\n
    \n
  • language (str)

  • \n
  • sourceCode (str)

  • \n
  • producer (str)

  • \n
  • deleted (bool | None)

  • \n
\n
\n
\n
\n
\nlanguage: str
\n

Language in which source code of this job was written.

\n
\n
\n
\nsourceCode: str
\n

Source code of this job.

\n
\n
\n
\n
\n

openlineage.client.generated.source_code_location_job module

\n
\n
\nclass openlineage.client.generated.source_code_location_job.SourceCodeLocationJobFacet(type, url, repoUrl=None, path=None, version=None, tag=None, branch=None, *, producer='', deleted=None)
\n

Bases: JobFacet

\n
\n
Parameters:
\n
    \n
  • type (str)

  • \n
  • url (str)

  • \n
  • repoUrl (str | None)

  • \n
  • path (str | None)

  • \n
  • version (str | None)

  • \n
  • tag (str | None)

  • \n
  • branch (str | None)

  • \n
  • producer (str)

  • \n
  • deleted (bool | None)

  • \n
\n
\n
\n
\n
\ntype: str
\n

the source control system

\n
\n
\n
\nurl: str
\n

the full http URL to locate the file

\n
\n
\n
\nrepoUrl: str | None
\n

the URL to the repository

\n
\n
\n
\npath: str | None
\n

the path in the repo containing the source files

\n
\n
\n
\nversion: str | None
\n

the current version deployed (not a branch name, the actual unique version)

\n
\n
\n
\ntag: str | None
\n

optional tag name

\n
\n
\n
\nbranch: str | None
\n

optional branch name

\n
\n
\n
\nurl_check(attribute, value)
\n
\n
Parameters:
\n
    \n
  • attribute (str)

  • \n
  • value (str)

  • \n
\n
\n
Return type:
\n

None

\n
\n
\n
\n
\n
\n
\n

openlineage.client.generated.sql_job module

\n
\n
\nclass openlineage.client.generated.sql_job.SQLJobFacet(query, *, producer='', deleted=None)
\n

Bases: JobFacet

\n
\n
Parameters:
\n
    \n
  • query (str)

  • \n
  • producer (str)

  • \n
  • deleted (bool | None)

  • \n
\n
\n
\n
\n
\nquery: str
\n
\n
\n
\n
\n

openlineage.client.generated.storage_dataset module

\n
\n
\nclass openlineage.client.generated.storage_dataset.StorageDatasetFacet(storageLayer, fileFormat=None, *, producer='', deleted=None)
\n

Bases: DatasetFacet

\n
\n
Parameters:
\n
    \n
  • storageLayer (str)

  • \n
  • fileFormat (str | None)

  • \n
  • producer (str)

  • \n
  • deleted (bool | None)

  • \n
\n
\n
\n
\n
\nstorageLayer: str
\n

iceberg, delta.

\n
\n
Type:
\n

Storage layer provider with allowed values

\n
\n
\n
\n
\n
\nfileFormat: str | None
\n

parquet, orc, avro, json, csv, text, xml.

\n
\n
Type:
\n

File format with allowed values

\n
\n
\n
\n
\n
\n\n
\n

openlineage.client.transport.composite module

\n
\n
\nclass openlineage.client.transport.composite.CompositeConfig(transports, continue_on_failure=True)
\n

Bases: Config

\n

CompositeConfig is a configuration class for CompositeTransport.

\n
\n
Parameters:
\n
    \n
  • transports (list[dict[str, Any]] | dict[str, dict[str, Any]])

  • \n
  • continue_on_failure (bool)

  • \n
\n
\n
\n
\n
\ntransports
\n

A list of dictionaries, where each dictionary represents the configuration\nfor a child transport. Each dictionary should contain the necessary parameters\nto initialize a specific transport instance.

\n
\n
\n
\ncontinue_on_failure
\n

If set to True, the CompositeTransport will attempt to emit the event using\nall configured transports, regardless of whether any previous transport\nin the list failed to emit the event. If set to False, an error in one\ntransport will halt the emission process for subsequent transports.

\n
\n
\n
\ntransports: list[dict[str, Any]] | dict[str, dict[str, Any]]
\n
\n
\n
\ncontinue_on_failure: bool
\n
\n
\n
\nclassmethod from_dict(params)
\n

Create a CompositeConfig object from a dictionary.

\n
\n
Parameters:
\n

params (dict[str, Any])

\n
\n
Return type:
\n

CompositeConfig

\n
\n
\n
\n
\n
\n
\nclass openlineage.client.transport.composite.CompositeTransport(config)
\n

Bases: Transport

\n

CompositeTransport is a transport class that emits events using multiple transports.

\n
\n
Parameters:
\n

config (CompositeConfig)

\n
\n
\n
\n
\nkind: str | None = 'composite'
\n
\n
\n
\nconfig_class
\n

alias of CompositeConfig

\n
\n
\n
\nproperty transports: list[Transport]
\n

Create and return a list of transports based on the config.

\n
\n
\n
\nemit(event)
\n

Emit an event using all transports in the config.

\n
\n
Parameters:
\n

event (Event)

\n
\n
Return type:
\n

None

\n
\n
\n
\n
\n
\n
\n

openlineage.client.transport.console module

\n
\n
\nclass openlineage.client.transport.console.ConsoleConfig
\n

Bases: Config

\n
\n
\n
\n
\n
\nclass openlineage.client.transport.console.ConsoleTransport(config)
\n

Bases: Transport

\n
\n
Parameters:
\n

config (ConsoleConfig)

\n
\n
\n
\n
\nkind: str | None = 'console'
\n
\n
\n
\nconfig_class
\n

alias of ConsoleConfig

\n
\n
\n
\nemit(event)
\n
\n
Parameters:
\n

event (Union[RunEvent, DatasetEvent, JobEvent, RunEvent, DatasetEvent, JobEvent])

\n
\n
Return type:
\n

None

\n
\n
\n
\n
\n
\n
\n

openlineage.client.transport.factory module

\n
\n
\nclass openlineage.client.transport.factory.DefaultTransportFactory
\n

Bases: TransportFactory

\n
\n
\n
\n
\nregister_transport(of_type, clazz)
\n
\n
Parameters:
\n
    \n
  • of_type (str)

  • \n
  • clazz (type[Transport] | str)

  • \n
\n
\n
Return type:
\n

None

\n
\n
\n
\n
\n
\ncreate(config=None)
\n

Initializes and returns a transport mechanism based on the provided configuration.

\n

If \u2018OPENLINEAGE_DISABLED\u2019 is set to \u2018true\u2019, a NoopTransport instance is returned,\neffectively disabling transport.\nIf a configuration dictionary is provided, transport specified by the config is initialized.\nIf no configuration is provided, the function defaults to a console-based transport, logging\na warning and printing events to the console.

\n
\n
Parameters:
\n

config (dict[str, str] | None)

\n
\n
Return type:
\n

Transport

\n
\n
\n
\n
\n
\n
\n

openlineage.client.transport.file module

\n
\n
\nclass openlineage.client.transport.file.FileConfig(log_file_path, append=False)
\n

Bases: Config

\n
\n
Parameters:
\n
    \n
  • log_file_path (str)

  • \n
  • append (bool)

  • \n
\n
\n
\n
\n
\nlog_file_path: str
\n
\n
\n
\nappend: bool = False
\n
\n
\n
\nclassmethod from_dict(params)
\n
\n
Parameters:
\n

params (dict[str, Any])

\n
\n
Return type:
\n

FileConfig

\n
\n
\n
\n
\n
\n
\nclass openlineage.client.transport.file.FileTransport(config)
\n

Bases: Transport

\n
\n
Parameters:
\n

config (FileConfig)

\n
\n
\n
\n
\nkind: str | None = 'file'
\n
\n
\n
\nconfig_class
\n

alias of FileConfig

\n
\n
\n
\nemit(event)
\n
\n
Parameters:
\n

event (Union[RunEvent, DatasetEvent, JobEvent, RunEvent, DatasetEvent, JobEvent])

\n
\n
Return type:
\n

None

\n
\n
\n
\n
\n
\n
\n

openlineage.client.transport.http module

\n
\n
\nclass openlineage.client.transport.http.TokenProvider(config)
\n

Bases: object

\n
\n
Parameters:
\n

config (dict[str, str])

\n
\n
\n
\n
\nget_bearer()
\n
\n
Return type:
\n

str | None

\n
\n
\n
\n
\n
\n
\nclass openlineage.client.transport.http.HttpCompression(value)
\n

Bases: Enum

\n

An enumeration.

\n
\n
\nGZIP = 'gzip'
\n
\n
\n
\n
\nclass openlineage.client.transport.http.ApiKeyTokenProvider(config)
\n

Bases: TokenProvider

\n
\n
Parameters:
\n

config (dict[str, str])

\n
\n
\n
\n
\nget_bearer()
\n
\n
Return type:
\n

str | None

\n
\n
\n
\n
\n
\n
\nopenlineage.client.transport.http.create_token_provider(auth)
\n
\n
Parameters:
\n

auth (dict[str, str])

\n
\n
Return type:
\n

TokenProvider

\n
\n
\n
\n
\n
\nopenlineage.client.transport.http.get_session()
\n
\n
Return type:
\n

Session

\n
\n
\n
\n
\n
\nclass openlineage.client.transport.http.HttpConfig(url, endpoint='api/v1/lineage', timeout=5.0, verify=True, auth=_Nothing.NOTHING, compression=None, session=None, adapter=None, custom_headers=_Nothing.NOTHING)
\n

Bases: Config

\n
\n
Parameters:
\n
    \n
  • url (str)

  • \n
  • endpoint (str)

  • \n
  • timeout (float)

  • \n
  • verify (bool)

  • \n
  • auth (TokenProvider)

  • \n
  • compression (HttpCompression | None)

  • \n
  • session (Session | None)

  • \n
  • adapter (HTTPAdapter | None)

  • \n
  • custom_headers (dict[str, str])

  • \n
\n
\n
\n
\n
\nurl: str
\n
\n
\n
\nendpoint: str
\n
\n
\n
\ntimeout: float
\n
\n
\n
\nverify: bool
\n
\n
\n
\nauth: TokenProvider
\n
\n
\n
\ncompression: HttpCompression | None
\n
\n
\n
\nsession: Session | None
\n
\n
\n
\nadapter: HTTPAdapter | None
\n
\n
\n
\ncustom_headers: dict[str, str]
\n
\n
\n
\nclassmethod from_dict(params)
\n
\n
Parameters:
\n

params (dict[str, Any])

\n
\n
Return type:
\n

HttpConfig

\n
\n
\n
\n
\n
\nclassmethod from_options(url, options, session)
\n
\n
Parameters:
\n
    \n
  • url (str)

  • \n
  • options (OpenLineageClientOptions)

  • \n
  • session (Session | None)

  • \n
\n
\n
Return type:
\n

HttpConfig

\n
\n
\n
\n
\n
\n
\nclass openlineage.client.transport.http.HttpTransport(config)
\n

Bases: Transport

\n
\n
Parameters:
\n

config (HttpConfig)

\n
\n
\n
\n
\nkind: str | None = 'http'
\n
\n
\n
\nconfig_class
\n

alias of HttpConfig

\n
\n
\n
\nset_adapter(adapter)
\n
\n
Parameters:
\n

adapter (HTTPAdapter)

\n
\n
Return type:
\n

None

\n
\n
\n
\n
\n
\nemit(event)
\n
\n
Parameters:
\n

event (Union[RunEvent, DatasetEvent, JobEvent, RunEvent, DatasetEvent, JobEvent])

\n
\n
Return type:
\n

Response

\n
\n
\n
\n
\n
\n
\n

openlineage.client.transport.kafka module

\n
\n
\nclass openlineage.client.transport.kafka.KafkaConfig(config, topic, messageKey=None, flush=True)
\n

Bases: Config

\n
\n
Parameters:
\n
    \n
  • config (dict[str, str])

  • \n
  • topic (str)

  • \n
  • messageKey (str | None)

  • \n
  • flush (bool)

  • \n
\n
\n
\n
\n
\nconfig: dict[str, str]
\n
\n
\n
\ntopic: str
\n
\n
\n
\nmessageKey: str | None
\n
\n
\n
\nflush: bool
\n
\n
\n
\nclassmethod from_dict(params)
\n
\n
Parameters:
\n

params (dict[str, Any])

\n
\n
Return type:
\n

_T

\n
\n
\n
\n
\n
\n
\nopenlineage.client.transport.kafka.on_delivery(err, msg)
\n
\n
Parameters:
\n
    \n
  • err (KafkaError)

  • \n
  • msg (Message)

  • \n
\n
\n
Return type:
\n

None

\n
\n
\n
\n
\n
\nclass openlineage.client.transport.kafka.KafkaTransport(config)
\n

Bases: Transport

\n
\n
Parameters:
\n

config (KafkaConfig)

\n
\n
\n
\n
\nkind: str | None = 'kafka'
\n
\n
\n
\nconfig_class
\n

alias of KafkaConfig

\n
\n
\n
\nemit(event)
\n
\n
Parameters:
\n

event (Event)

\n
\n
Return type:
\n

None

\n
\n
\n
\n
\n
\n
\n

openlineage.client.transport.msk_iam module

\n
\n
\nclass openlineage.client.transport.msk_iam.MSKIAMConfig(config, topic, messageKey=None, flush=True, region=None, aws_profile=None, role_arn=None, aws_debug_creds=False)
\n

Bases: KafkaConfig

\n
\n
Parameters:
\n
    \n
  • config (dict[str, str])

  • \n
  • topic (str)

  • \n
  • messageKey (str | None)

  • \n
  • flush (bool)

  • \n
  • region (str)

  • \n
  • aws_profile (None | str)

  • \n
  • role_arn (None | str)

  • \n
  • aws_debug_creds (bool)

  • \n
\n
\n
\n
\n
\nregion: str
\n
\n
\n
\naws_profile: None | str
\n
\n
\n
\nrole_arn: None | str
\n
\n
\n
\naws_debug_creds: bool
\n
\n
\n
\n
\nclass openlineage.client.transport.msk_iam.MSKIAMTransport(config)
\n

Bases: KafkaTransport

\n
\n
Parameters:
\n

config (MSKIAMConfig)

\n
\n
\n
\n
\nkind: str | None = 'msk-iam'
\n
\n
\n
\nconfig_class
\n

alias of MSKIAMConfig

\n
\n
\n
\n
\n

openlineage.client.transport.noop module

\n
\n
\nclass openlineage.client.transport.noop.NoopConfig
\n

Bases: Config

\n
\n
\n
\n
\n
\nclass openlineage.client.transport.noop.NoopTransport(config)
\n

Bases: Transport

\n
\n
Parameters:
\n

config (NoopConfig)

\n
\n
\n
\n
\nkind: str | None = 'noop'
\n
\n
\n
\nconfig_class
\n

alias of NoopConfig

\n
\n
\n
\nemit(event)
\n
\n
Parameters:
\n

event (Union[RunEvent, DatasetEvent, JobEvent, RunEvent, DatasetEvent, JobEvent])

\n
\n
Return type:
\n

None

\n
\n
\n
\n
\n
\n
\n

openlineage.client.transport.transport module

\n

To implement custom Transport implement Config and Transport classes.

\n
\n
Transport needs to
    \n
  • specify class variable config that will point to Config class that Transport requires

  • \n
  • __init__ that will accept specified Config class instance

  • \n
  • implement emit method that will accept RunEvent

  • \n
\n
\n
\n

Config file is read and parameters there are passed to from_dict classmethod.\nThe config class can have more complex attributes, but needs to be able to\ninstantiate them in from_dict method.

\n

DefaultTransportFactory instantiates custom transports by looking at type field in\nclass config.

\n
\n
\nclass openlineage.client.transport.transport.Config
\n

Bases: object

\n
\n
\n
\n
\nclassmethod from_dict(params)
\n
\n
Parameters:
\n

params (dict[str, Any])

\n
\n
Return type:
\n

_T

\n
\n
\n
\n
\n
\n
\nclass openlineage.client.transport.transport.Transport
\n

Bases: object

\n
\n
\nkind: str | None = None
\n
\n
\n
\nname: str | None = None
\n
\n
\n
\nconfig_class
\n

alias of Config

\n
\n
\n
\nemit(event)
\n
\n
Parameters:
\n

event (Event)

\n
\n
Return type:
\n

Any

\n
\n
\n
\n
\n
\n
\nclass openlineage.client.transport.transport.TransportFactory
\n

Bases: object

\n
\n
\ncreate(config=None)
\n
\n
Parameters:
\n

config (dict[str, str] | None)

\n
\n
Return type:
\n

Transport

\n
\n
\n
\n
\n
\n
\n
"}}> +
\n
\n
\n On this page\n
\n
\n\n
\n
\n\n
\n

openlineage.client.client module

\n
\n
\nclass openlineage.client.client.OpenLineageClientOptions(timeout=5.0, verify=True, api_key=None, adapter=None)
\n

Bases: object

\n
\n
Parameters:
\n
    \n
  • timeout (float)

  • \n
  • verify (bool)

  • \n
  • api_key (Optional[str])

  • \n
  • adapter (Optional[HTTPAdapter])

  • \n
\n
\n
\n
\n
\ntimeout: float
\n
\n
\n
\nverify: bool
\n
\n
\n
\napi_key: str
\n
\n
\n
\nadapter: HTTPAdapter
\n
\n
\n
\n
\nclass openlineage.client.client.OpenLineageConfig(transport=_Nothing.NOTHING, facets=_Nothing.NOTHING, filters=_Nothing.NOTHING)
\n

Bases: object

\n
\n
Parameters:
\n
    \n
  • transport (dict[str, Any] | None)

  • \n
  • facets (FacetsConfig)

  • \n
  • filters (list[FilterConfig])

  • \n
\n
\n
\n
\n
\ntransport: dict[str, Any] | None
\n
\n
\n
\nfacets: FacetsConfig
\n
\n
\n
\nfilters: list[FilterConfig]
\n
\n
\n
\nclassmethod from_dict(params)
\n
\n
Parameters:
\n

params (dict[str, Any])

\n
\n
Return type:
\n

OpenLineageConfig

\n
\n
\n
\n
\n
\n
\nclass openlineage.client.client.OpenLineageClient(url=None, options=None, session=None, transport=None, factory=None, *, config=None)
\n

Bases: object

\n
\n
Parameters:
\n
    \n
  • url (str | None)

  • \n
  • options (OpenLineageClientOptions | None)

  • \n
  • session (Session | None)

  • \n
  • transport (Transport | None)

  • \n
  • factory (TransportFactory | None)

  • \n
  • config (dict[str, Any] | None)

  • \n
\n
\n
\n
\n
\nDYNAMIC_ENV_VARS_PREFIX = 'OPENLINEAGE__'
\n
\n
\n
\nDEFAULT_URL_TRANSPORT_NAME = 'default_http'
\n
\n
\n
\nclassmethod from_environment()
\n
\n
Return type:
\n

_T

\n
\n
\n
\n
\n
\nclassmethod from_dict(config)
\n
\n
Parameters:
\n

config (dict[str, str])

\n
\n
Return type:
\n

_T

\n
\n
\n
\n
\n
\nfilter_event(event)
\n

Filters jobs according to config-defined events

\n
\n
Parameters:
\n

event (Event)

\n
\n
Return type:
\n

Event | None

\n
\n
\n
\n
\n
\nemit(event)
\n
\n
Parameters:
\n

event (Union[RunEvent, DatasetEvent, JobEvent, RunEvent, DatasetEvent, JobEvent])

\n
\n
Return type:
\n

None

\n
\n
\n
\n
\n
\nproperty config: OpenLineageConfig
\n

Retrieves the OpenLineage configuration.

\n

This property method returns the content of the OpenLineage YAML config file.\nThe configuration is determined by merging sources in the following order of precedence:\n1. User-defined configuration passed to the client constructor.\n2. YAML config file located in one of the following paths:\n- Path specified by the OPENLINEAGE_CONFIG environment variable.\n- Current working directory.\n- $HOME/.openlineage.\n3. Environment variables with the OPENLINEAGE__ prefix.\nIf the configuration is not already loaded, it will be constructed by merging the above sources.\nIn case of a TypeError during the parsing of the configuration, a ValueError will be raised indicating\nthat the structure of the config does not match the expected format.

\n
\n
\n
\nadd_environment_facets(event)
\n

Adds environment variables as facets to the event object.

\n
\n
Parameters:
\n

event (Union[RunEvent, DatasetEvent, JobEvent, RunEvent, DatasetEvent, JobEvent])

\n
\n
Return type:
\n

Union[RunEvent, DatasetEvent, JobEvent, RunEvent, DatasetEvent, JobEvent]

\n
\n
\n
\n
\n
\n
\n

openlineage.client.event_v2 module

\n
\n
\nclass openlineage.client.event_v2.BaseEvent(*, eventTime, producer='')
\n

Bases: RedactMixin

\n
\n
Parameters:
\n
    \n
  • eventTime (str)

  • \n
  • producer (str)

  • \n
\n
\n
\n
\n
\neventTime: str
\n

the time the event occurred at

\n
\n
\n
\nproducer: str
\n
\n
\n
\nschemaURL: str
\n
\n
\n
\nproperty skip_redact: list[str]
\n
\n
\n
\neventtime_check(attribute, value)
\n
\n
Parameters:
\n
    \n
  • attribute (str)

  • \n
  • value (str)

  • \n
\n
\n
Return type:
\n

None

\n
\n
\n
\n
\n
\nproducer_check(attribute, value)
\n
\n
Parameters:
\n
    \n
  • attribute (str)

  • \n
  • value (str)

  • \n
\n
\n
Return type:
\n

None

\n
\n
\n
\n
\n
\nschemaurl_check(attribute, value)
\n
\n
Parameters:
\n
    \n
  • attribute (str)

  • \n
  • value (str)

  • \n
\n
\n
Return type:
\n

None

\n
\n
\n
\n
\n
\n
\nclass openlineage.client.event_v2.RunEvent(*, eventTime, producer='', run, job, eventType=None, inputs=_Nothing.NOTHING, outputs=_Nothing.NOTHING)
\n

Bases: BaseEvent

\n
\n
Parameters:
\n
    \n
  • eventTime (str)

  • \n
  • producer (str)

  • \n
  • run (Run)

  • \n
  • job (Job)

  • \n
  • eventType (EventType | None)

  • \n
  • inputs (list[InputDataset] | None)

  • \n
  • outputs (list[OutputDataset] | None)

  • \n
\n
\n
\n
\n
\nrun: Run
\n
\n
\n
\njob: Job
\n
\n
\n
\neventType: EventType | None
\n

the current transition of the run state. It is required to issue 1 START event and 1 of [ COMPLETE,\nABORT, FAIL ] event per run. Additional events with OTHER eventType can be added to the same run.\nFor example to send additional metadata after the run is complete

\n
\n
\n
\ninputs: list[InputDataset] | None
\n

The set of input datasets.

\n
\n
\n
\noutputs: list[OutputDataset] | None
\n

The set of output datasets.

\n
\n
\n
\n
\nclass openlineage.client.event_v2.JobEvent(*, eventTime, producer='', job, inputs=_Nothing.NOTHING, outputs=_Nothing.NOTHING)
\n

Bases: BaseEvent

\n
\n
Parameters:
\n
\n
\n
\n
\n
\njob: Job
\n
\n
\n
\ninputs: list[InputDataset] | None
\n

The set of input datasets.

\n
\n
\n
\noutputs: list[OutputDataset] | None
\n

The set of output datasets.

\n
\n
\n
\n
\nclass openlineage.client.event_v2.DatasetEvent(*, eventTime, producer='', dataset)
\n

Bases: BaseEvent

\n
\n
Parameters:
\n
    \n
  • eventTime (str)

  • \n
  • producer (str)

  • \n
  • dataset (StaticDataset)

  • \n
\n
\n
\n
\n
\ndataset: StaticDataset
\n
\n
\n
\n
\nopenlineage.client.event_v2.RunState
\n

alias of EventType

\n
\n
\n
\nclass openlineage.client.event_v2.Dataset(namespace, name, *, facets=_Nothing.NOTHING)
\n

Bases: RedactMixin

\n
\n
Parameters:
\n
    \n
  • namespace (str)

  • \n
  • name (str)

  • \n
  • facets (dict[str, DatasetFacet] | None)

  • \n
\n
\n
\n
\n
\nnamespace: str
\n

The namespace containing that dataset

\n
\n
\n
\nname: str
\n

The unique name for that dataset within that namespace

\n
\n
\n
\nfacets: dict[str, DatasetFacet] | None
\n

The facets for this dataset

\n
\n
\n
\n
\nclass openlineage.client.event_v2.InputDataset(namespace, name, inputFacets=_Nothing.NOTHING, *, facets=_Nothing.NOTHING)
\n

Bases: Dataset

\n

An input dataset

\n
\n
Parameters:
\n
\n
\n
\n
\n
\ninputFacets: dict[str, InputDatasetFacet] | None
\n

The input facets for this dataset.

\n
\n
\n
\n
\nclass openlineage.client.event_v2.OutputDataset(namespace, name, outputFacets=_Nothing.NOTHING, *, facets=_Nothing.NOTHING)
\n

Bases: Dataset

\n

An output dataset

\n
\n
Parameters:
\n
\n
\n
\n
\n
\noutputFacets: dict[str, OutputDatasetFacet] | None
\n

The output facets for this dataset

\n
\n
\n
\n
\nclass openlineage.client.event_v2.Run(runId, facets=_Nothing.NOTHING)
\n

Bases: RedactMixin

\n
\n
Parameters:
\n
    \n
  • runId (str)

  • \n
  • facets (dict[str, RunFacet] | None)

  • \n
\n
\n
\n
\n
\nrunId: str
\n

The globally unique ID of the run associated with the job.

\n
\n
\n
\nfacets: dict[str, RunFacet] | None
\n

The run facets.

\n
\n
\n
\nrunid_check(attribute, value)
\n
\n
Parameters:
\n
    \n
  • attribute (str)

  • \n
  • value (str)

  • \n
\n
\n
Return type:
\n

None

\n
\n
\n
\n
\n
\n
\nclass openlineage.client.event_v2.Job(namespace, name, facets=_Nothing.NOTHING)
\n

Bases: RedactMixin

\n
\n
Parameters:
\n
    \n
  • namespace (str)

  • \n
  • name (str)

  • \n
  • facets (dict[str, JobFacet] | None)

  • \n
\n
\n
\n
\n
\nnamespace: str
\n

The namespace containing that job

\n
\n
\n
\nname: str
\n

The unique name for that job within that namespace

\n
\n
\n
\nfacets: dict[str, JobFacet] | None
\n

The job facets.

\n
\n
\n
\n
\nopenlineage.client.event_v2.set_producer(producer)
\n
\n
Parameters:
\n

producer (str)

\n
\n
Return type:
\n

None

\n
\n
\n
\n
\n
\n

openlineage.client.facet module

\n
\n
\nopenlineage.client.facet.set_producer(producer)
\n
\n
Parameters:
\n

producer (str)

\n
\n
Return type:
\n

None

\n
\n
\n
\n
\n
\nclass openlineage.client.facet.BaseFacet
\n

Bases: RedactMixin

\n
\n
\n
\n
\nproperty skip_redact: List[str]
\n
\n
\n
\n
\nclass openlineage.client.facet.NominalTimeRunFacet(nominalStartTime, nominalEndTime=None)
\n

Bases: BaseFacet

\n
\n
Parameters:
\n
    \n
  • nominalStartTime (str)

  • \n
  • nominalEndTime (Optional[str])

  • \n
\n
\n
\n
\n
\nnominalStartTime: str
\n
\n
\n
\nnominalEndTime: Optional[str]
\n
\n
\n
\n
\nclass openlineage.client.facet.ParentRunFacet(run, job)
\n

Bases: BaseFacet

\n
\n
Parameters:
\n
    \n
  • run (Dict[Any, Any])

  • \n
  • job (Dict[Any, Any])

  • \n
\n
\n
\n
\n
\nrun: Dict[Any, Any]
\n
\n
\n
\njob: Dict[Any, Any]
\n
\n
\n
\nclassmethod create(runId, namespace, name)
\n
\n
Parameters:
\n
    \n
  • runId (str)

  • \n
  • namespace (str)

  • \n
  • name (str)

  • \n
\n
\n
Return type:
\n

ParentRunFacet

\n
\n
\n
\n
\n
\n
\nclass openlineage.client.facet.DocumentationJobFacet(description)
\n

Bases: BaseFacet

\n
\n
Parameters:
\n

description (str)

\n
\n
\n
\n
\ndescription: str
\n
\n
\n
\n
\nclass openlineage.client.facet.SourceCodeLocationJobFacet(type, url)
\n

Bases: BaseFacet

\n
\n
Parameters:
\n
    \n
  • type (str)

  • \n
  • url (str)

  • \n
\n
\n
\n
\n
\ntype: str
\n
\n
\n
\nurl: str
\n
\n
\n
\n
\nclass openlineage.client.facet.SqlJobFacet(query)
\n

Bases: BaseFacet

\n
\n
Parameters:
\n

query (str)

\n
\n
\n
\n
\nquery: str
\n
\n
\n
\n
\nclass openlineage.client.facet.DocumentationDatasetFacet(description)
\n

Bases: BaseFacet

\n
\n
Parameters:
\n

description (str)

\n
\n
\n
\n
\ndescription: str
\n
\n
\n
\n
\nclass openlineage.client.facet.SchemaField(name, type, description=None)
\n

Bases: RedactMixin

\n
\n
Parameters:
\n
    \n
  • name (str)

  • \n
  • type (str)

  • \n
  • description (Optional[str])

  • \n
\n
\n
\n
\n
\nname: str
\n
\n
\n
\ntype: str
\n
\n
\n
\ndescription: Optional[str]
\n
\n
\n
\n
\nclass openlineage.client.facet.SchemaDatasetFacet(fields)
\n

Bases: BaseFacet

\n
\n
Parameters:
\n

fields (List[SchemaField])

\n
\n
\n
\n
\nfields: List[SchemaField]
\n
\n
\n
\n
\nclass openlineage.client.facet.DataSourceDatasetFacet(name, uri)
\n

Bases: BaseFacet

\n
\n
Parameters:
\n
    \n
  • name (str)

  • \n
  • uri (str)

  • \n
\n
\n
\n
\n
\nname: str
\n
\n
\n
\nuri: str
\n
\n
\n
\n
\nclass openlineage.client.facet.OutputStatisticsOutputDatasetFacet(rowCount=None, size=None, fileCount=None)
\n

Bases: BaseFacet

\n
\n
Parameters:
\n
    \n
  • rowCount (Optional[int])

  • \n
  • size (Optional[int])

  • \n
  • fileCount (Optional[int])

  • \n
\n
\n
\n
\n
\nrowCount: Optional[int]
\n
\n
\n
\nsize: Optional[int]
\n
\n
\n
\nfileCount: Optional[int]
\n
\n
\n
\n
\nclass openlineage.client.facet.ColumnMetric(nullCount=None, distinctCount=None, sum=None, count=None, min=None, max=None, quantiles=None)
\n

Bases: object

\n
\n
Parameters:
\n
    \n
  • nullCount (Optional[int])

  • \n
  • distinctCount (Optional[int])

  • \n
  • sum (Optional[int])

  • \n
  • count (Optional[int])

  • \n
  • min (Optional[float])

  • \n
  • max (Optional[float])

  • \n
  • quantiles (Optional[Dict[str, float]])

  • \n
\n
\n
\n
\n
\nnullCount: Optional[int]
\n
\n
\n
\ndistinctCount: Optional[int]
\n
\n
\n
\nsum: Optional[int]
\n
\n
\n
\ncount: Optional[int]
\n
\n
\n
\nmin: Optional[float]
\n
\n
\n
\nmax: Optional[float]
\n
\n
\n
\nquantiles: Optional[Dict[str, float]]
\n
\n
\n
\n
\nclass openlineage.client.facet.DataQualityMetricsInputDatasetFacet(rowCount=None, bytes=None, fileCount=None, columnMetrics=_Nothing.NOTHING)
\n

Bases: BaseFacet

\n
\n
Parameters:
\n
    \n
  • rowCount (Optional[int])

  • \n
  • bytes (Optional[int])

  • \n
  • fileCount (Optional[int])

  • \n
  • columnMetrics (Dict[str, ColumnMetric])

  • \n
\n
\n
\n
\n
\nrowCount: Optional[int]
\n
\n
\n
\nbytes: Optional[int]
\n
\n
\n
\nfileCount: Optional[int]
\n
\n
\n
\ncolumnMetrics: Dict[str, ColumnMetric]
\n
\n
\n
\n
\nclass openlineage.client.facet.Assertion(assertion, success, column=None)
\n

Bases: RedactMixin

\n
\n
Parameters:
\n
    \n
  • assertion (str)

  • \n
  • success (bool)

  • \n
  • column (Optional[str])

  • \n
\n
\n
\n
\n
\nassertion: str
\n
\n
\n
\nsuccess: bool
\n
\n
\n
\ncolumn: Optional[str]
\n
\n
\n
\n
\nclass openlineage.client.facet.DataQualityAssertionsDatasetFacet(assertions)
\n

Bases: BaseFacet

\n

This facet represents asserted expectations on dataset or it\u2019s column.

\n
\n
Parameters:
\n

assertions (List[Assertion])

\n
\n
\n
\n
\nassertions: List[Assertion]
\n
\n
\n
\n
\nclass openlineage.client.facet.SourceCodeJobFacet(language, source)
\n

Bases: BaseFacet

\n

This facet represents source code that the job executed.

\n
\n
Parameters:
\n
    \n
  • language (str)

  • \n
  • source (str)

  • \n
\n
\n
\n
\n
\nlanguage: str
\n
\n
\n
\nsource: str
\n
\n
\n
\n
\nclass openlineage.client.facet.ExternalQueryRunFacet(externalQueryId, source)
\n

Bases: BaseFacet

\n
\n
Parameters:
\n
    \n
  • externalQueryId (str)

  • \n
  • source (str)

  • \n
\n
\n
\n
\n
\nexternalQueryId: str
\n
\n
\n
\nsource: str
\n
\n
\n
\n
\nclass openlineage.client.facet.ErrorMessageRunFacet(message, programmingLanguage, stackTrace=None)
\n

Bases: BaseFacet

\n

This facet represents an error message that was the result of a job run.

\n
\n
Parameters:
\n
    \n
  • message (str)

  • \n
  • programmingLanguage (str)

  • \n
  • stackTrace (Optional[str])

  • \n
\n
\n
\n
\n
\nmessage: str
\n
\n
\n
\nprogrammingLanguage: str
\n
\n
\n
\nstackTrace: Optional[str]
\n
\n
\n
\n
\nclass openlineage.client.facet.SymlinksDatasetFacetIdentifiers(namespace, name, type)
\n

Bases: object

\n
\n
Parameters:
\n
    \n
  • namespace (str)

  • \n
  • name (str)

  • \n
  • type (str)

  • \n
\n
\n
\n
\n
\nnamespace: str
\n
\n
\n
\nname: str
\n
\n
\n
\ntype: str
\n
\n
\n
\n
\nclass openlineage.client.facet.SymlinksDatasetFacet(identifiers=_Nothing.NOTHING)
\n

Bases: BaseFacet

\n

This facet represents dataset symlink names.

\n
\n
Parameters:
\n

identifiers (List[SymlinksDatasetFacetIdentifiers])

\n
\n
\n
\n
\nidentifiers: List[SymlinksDatasetFacetIdentifiers]
\n
\n
\n
\n
\nclass openlineage.client.facet.StorageDatasetFacet(storageLayer, fileFormat)
\n

Bases: BaseFacet

\n

This facet represents dataset symlink names.

\n
\n
Parameters:
\n
    \n
  • storageLayer (str)

  • \n
  • fileFormat (str)

  • \n
\n
\n
\n
\n
\nstorageLayer: str
\n
\n
\n
\nfileFormat: str
\n
\n
\n
\n
\nclass openlineage.client.facet.OwnershipJobFacetOwners(name, type=None)
\n

Bases: object

\n
\n
Parameters:
\n
    \n
  • name (str)

  • \n
  • type (Optional[str])

  • \n
\n
\n
\n
\n
\nname: str
\n
\n
\n
\ntype: Optional[str]
\n
\n
\n
\n
\nclass openlineage.client.facet.OwnershipJobFacet(owners=_Nothing.NOTHING)
\n

Bases: BaseFacet

\n

This facet represents ownership of a job.

\n
\n
Parameters:
\n

owners (List[OwnershipJobFacetOwners])

\n
\n
\n
\n
\nowners: List[OwnershipJobFacetOwners]
\n
\n
\n
\n
\nclass openlineage.client.facet.JobTypeJobFacet(processingType, integration, jobType)
\n

Bases: BaseFacet

\n

This facet represents job type properties.

\n
\n
Parameters:
\n
    \n
  • processingType (str)

  • \n
  • integration (str)

  • \n
  • jobType (str)

  • \n
\n
\n
\n
\n
\nprocessingType: str
\n
\n
\n
\nintegration: str
\n
\n
\n
\njobType: str
\n
\n
\n
\n
\nclass openlineage.client.facet.DatasetVersionDatasetFacet(datasetVersion)
\n

Bases: BaseFacet

\n

This facet represents version of a dataset.

\n
\n
Parameters:
\n

datasetVersion (str)

\n
\n
\n
\n
\ndatasetVersion: str
\n
\n
\n
\n
\nclass openlineage.client.facet.LifecycleStateChange(value)
\n

Bases: Enum

\n

An enumeration.

\n
\n
\nALTER = 'ALTER'
\n
\n
\n
\nCREATE = 'CREATE'
\n
\n
\n
\nDROP = 'DROP'
\n
\n
\n
\nOVERWRITE = 'OVERWRITE'
\n
\n
\n
\nRENAME = 'RENAME'
\n
\n
\n
\nTRUNCATE = 'TRUNCATE'
\n
\n
\n
\n
\nclass openlineage.client.facet.LifecycleStateChangeDatasetFacetPreviousIdentifier(name, namespace)
\n

Bases: object

\n
\n
Parameters:
\n
    \n
  • name (str)

  • \n
  • namespace (str)

  • \n
\n
\n
\n
\n
\nname: str
\n
\n
\n
\nnamespace: str
\n
\n
\n
\n
\nclass openlineage.client.facet.LifecycleStateChangeDatasetFacet(lifecycleStateChange, previousIdentifier)
\n

Bases: BaseFacet

\n

This facet represents information of lifecycle changes of a dataset.

\n
\n
Parameters:
\n
\n
\n
\n
\n
\nlifecycleStateChange: LifecycleStateChange
\n
\n
\n
\npreviousIdentifier: LifecycleStateChangeDatasetFacetPreviousIdentifier
\n
\n
\n
\n
\nclass openlineage.client.facet.OwnershipDatasetFacetOwners(name, type)
\n

Bases: object

\n
\n
Parameters:
\n
    \n
  • name (str)

  • \n
  • type (str)

  • \n
\n
\n
\n
\n
\nname: str
\n
\n
\n
\ntype: str
\n
\n
\n
\n
\nclass openlineage.client.facet.OwnershipDatasetFacet(owners=_Nothing.NOTHING)
\n

Bases: BaseFacet

\n

This facet represents ownership of a dataset.

\n
\n
Parameters:
\n

owners (List[OwnershipDatasetFacetOwners])

\n
\n
\n
\n
\nowners: List[OwnershipDatasetFacetOwners]
\n
\n
\n
\n
\nclass openlineage.client.facet.ColumnLineageDatasetFacetFieldsAdditionalInputFields(namespace, name, field)
\n

Bases: RedactMixin

\n
\n
Parameters:
\n
    \n
  • namespace (str)

  • \n
  • name (str)

  • \n
  • field (str)

  • \n
\n
\n
\n
\n
\nnamespace: str
\n
\n
\n
\nname: str
\n
\n
\n
\nfield: str
\n
\n
\n
\n
\nclass openlineage.client.facet.ColumnLineageDatasetFacetFieldsAdditional(inputFields, transformationDescription, transformationType)
\n

Bases: object

\n
\n
Parameters:
\n
\n
\n
\n
\n
\ninputFields: ClassVar[List[ColumnLineageDatasetFacetFieldsAdditionalInputFields]]
\n
\n
\n
\ntransformationDescription: str
\n
\n
\n
\ntransformationType: str
\n
\n
\n
\n
\nclass openlineage.client.facet.ColumnLineageDatasetFacet(fields=_Nothing.NOTHING)
\n

Bases: BaseFacet

\n

This facet contains column lineage of a dataset.

\n
\n
Parameters:
\n

fields (Dict[str, ColumnLineageDatasetFacetFieldsAdditional])

\n
\n
\n
\n
\nfields: Dict[str, ColumnLineageDatasetFacetFieldsAdditional]
\n
\n
\n
\n
\nclass openlineage.client.facet.ProcessingEngineRunFacet(version, name, openlineageAdapterVersion)
\n

Bases: BaseFacet

\n
\n
Parameters:
\n
    \n
  • version (str)

  • \n
  • name (str)

  • \n
  • openlineageAdapterVersion (str)

  • \n
\n
\n
\n
\n
\nversion: str
\n
\n
\n
\nname: str
\n
\n
\n
\nopenlineageAdapterVersion: str
\n
\n
\n
\n
\nclass openlineage.client.facet.ExtractionError(errorMessage, stackTrace, task, taskNumber)
\n

Bases: BaseFacet

\n
\n
Parameters:
\n
    \n
  • errorMessage (str)

  • \n
  • stackTrace (Optional[str])

  • \n
  • task (Optional[str])

  • \n
  • taskNumber (Optional[int])

  • \n
\n
\n
\n
\n
\nerrorMessage: str
\n
\n
\n
\nstackTrace: Optional[str]
\n
\n
\n
\ntask: Optional[str]
\n
\n
\n
\ntaskNumber: Optional[int]
\n
\n
\n
\n
\nclass openlineage.client.facet.ExtractionErrorRunFacet(totalTasks, failedTasks, errors)
\n

Bases: BaseFacet

\n
\n
Parameters:
\n
\n
\n
\n
\n
\ntotalTasks: int
\n
\n
\n
\nfailedTasks: int
\n
\n
\n
\nerrors: List[ExtractionError]
\n
\n
\n
\n
\n

openlineage.client.facet_v2 module

\n
\n
\nclass openlineage.client.facet_v2.BaseFacet(*, producer='')
\n

Bases: RedactMixin

\n

all fields of the base facet are prefixed with _ to avoid name conflicts in facets

\n
\n
Parameters:
\n

producer (str)

\n
\n
\n
\n
\nproperty skip_redact: list[str]
\n
\n
\n
\n
\nclass openlineage.client.facet_v2.DatasetFacet(*, producer='', deleted=None)
\n

Bases: BaseFacet

\n

A Dataset Facet

\n
\n
Parameters:
\n
    \n
  • producer (str)

  • \n
  • deleted (bool | None)

  • \n
\n
\n
\n
\n
\n
\nclass openlineage.client.facet_v2.InputDatasetFacet(*, producer='')
\n

Bases: BaseFacet

\n

An Input Dataset Facet

\n
\n
Parameters:
\n

producer (str)

\n
\n
\n
\n
\n
\nclass openlineage.client.facet_v2.JobFacet(*, producer='', deleted=None)
\n

Bases: BaseFacet

\n

A Job Facet

\n
\n
Parameters:
\n
    \n
  • producer (str)

  • \n
  • deleted (bool | None)

  • \n
\n
\n
\n
\n
\n
\nclass openlineage.client.facet_v2.OutputDatasetFacet(*, producer='')
\n

Bases: BaseFacet

\n

An Output Dataset Facet

\n
\n
Parameters:
\n

producer (str)

\n
\n
\n
\n
\n
\nclass openlineage.client.facet_v2.RunFacet(*, producer='')
\n

Bases: BaseFacet

\n

A Run Facet

\n
\n
Parameters:
\n

producer (str)

\n
\n
\n
\n
\n
\nopenlineage.client.facet_v2.set_producer(producer)
\n
\n
Parameters:
\n

producer (str)

\n
\n
Return type:
\n

None

\n
\n
\n
\n
\n
\n

openlineage.client.filter module

\n
\n
\nclass openlineage.client.filter.FilterConfig(type=None, match=None, regex=None)
\n

Bases: object

\n
\n
Parameters:
\n
    \n
  • type (str | None)

  • \n
  • match (str | None)

  • \n
  • regex (str | None)

  • \n
\n
\n
\n
\n
\ntype: str | None
\n
\n
\n
\nmatch: str | None
\n
\n
\n
\nregex: str | None
\n
\n
\n
\n
\nclass openlineage.client.filter.Filter
\n

Bases: object

\n
\n
\nfilter_event(event)
\n
\n
Parameters:
\n

event (RunEventType)

\n
\n
Return type:
\n

RunEventType | None

\n
\n
\n
\n
\n
\n
\nclass openlineage.client.filter.ExactMatchFilter(match)
\n

Bases: Filter

\n
\n
Parameters:
\n

match (str)

\n
\n
\n
\n
\nfilter_event(event)
\n
\n
Parameters:
\n

event (RunEventType)

\n
\n
Return type:
\n

RunEventType | None

\n
\n
\n
\n
\n
\n
\nclass openlineage.client.filter.RegexFilter(regex)
\n

Bases: Filter

\n
\n
Parameters:
\n

regex (str)

\n
\n
\n
\n
\nfilter_event(event)
\n
\n
Parameters:
\n

event (RunEventType)

\n
\n
Return type:
\n

RunEventType | None

\n
\n
\n
\n
\n
\n
\nopenlineage.client.filter.create_filter(conf)
\n
\n
Parameters:
\n

conf (FilterConfig)

\n
\n
Return type:
\n

Filter | None

\n
\n
\n
\n
\n
\n

openlineage.client.run module

\n
\n
\nclass openlineage.client.run.RunState(value)
\n

Bases: Enum

\n

An enumeration.

\n
\n
\nSTART = 'START'
\n
\n
\n
\nRUNNING = 'RUNNING'
\n
\n
\n
\nCOMPLETE = 'COMPLETE'
\n
\n
\n
\nABORT = 'ABORT'
\n
\n
\n
\nFAIL = 'FAIL'
\n
\n
\n
\nOTHER = 'OTHER'
\n
\n
\n
\n
\nclass openlineage.client.run.Dataset(namespace, name, facets=_Nothing.NOTHING)
\n

Bases: RedactMixin

\n
\n
Parameters:
\n
    \n
  • namespace (str)

  • \n
  • name (str)

  • \n
  • facets (Dict[Any, Any])

  • \n
\n
\n
\n
\n
\nnamespace: str
\n
\n
\n
\nname: str
\n
\n
\n
\nfacets: Dict[Any, Any]
\n
\n
\n
\n
\nclass openlineage.client.run.InputDataset(namespace, name, facets=_Nothing.NOTHING, inputFacets=_Nothing.NOTHING)
\n

Bases: Dataset

\n
\n
Parameters:
\n
    \n
  • namespace (str)

  • \n
  • name (str)

  • \n
  • facets (Dict[Any, Any])

  • \n
  • inputFacets (Dict[Any, Any])

  • \n
\n
\n
\n
\n
\ninputFacets: Dict[Any, Any]
\n
\n
\n
\n
\nclass openlineage.client.run.OutputDataset(namespace, name, facets=_Nothing.NOTHING, outputFacets=_Nothing.NOTHING)
\n

Bases: Dataset

\n
\n
Parameters:
\n
    \n
  • namespace (str)

  • \n
  • name (str)

  • \n
  • facets (Dict[Any, Any])

  • \n
  • outputFacets (Dict[Any, Any])

  • \n
\n
\n
\n
\n
\noutputFacets: Dict[Any, Any]
\n
\n
\n
\n
\nclass openlineage.client.run.DatasetEvent(eventTime, producer, schemaURL, dataset)
\n

Bases: RedactMixin

\n
\n
Parameters:
\n
    \n
  • eventTime (str)

  • \n
  • producer (str)

  • \n
  • schemaURL (str)

  • \n
  • dataset (Dataset)

  • \n
\n
\n
\n
\n
\neventTime: str
\n
\n
\n
\nproducer: str
\n
\n
\n
\nschemaURL: str
\n
\n
\n
\ndataset: Dataset
\n
\n
\n
\n
\nclass openlineage.client.run.Job(namespace, name, facets=_Nothing.NOTHING)
\n

Bases: RedactMixin

\n
\n
Parameters:
\n
    \n
  • namespace (str)

  • \n
  • name (str)

  • \n
  • facets (Dict[Any, Any])

  • \n
\n
\n
\n
\n
\nnamespace: str
\n
\n
\n
\nname: str
\n
\n
\n
\nfacets: Dict[Any, Any]
\n
\n
\n
\n
\nclass openlineage.client.run.JobEvent(eventTime, producer, schemaURL, job, inputs=_Nothing.NOTHING, outputs=_Nothing.NOTHING)
\n

Bases: RedactMixin

\n
\n
Parameters:
\n
    \n
  • eventTime (str)

  • \n
  • producer (str)

  • \n
  • schemaURL (str)

  • \n
  • job (Job)

  • \n
  • inputs (Optional[List[Dataset]])

  • \n
  • outputs (Optional[List[Dataset]])

  • \n
\n
\n
\n
\n
\neventTime: str
\n
\n
\n
\nproducer: str
\n
\n
\n
\nschemaURL: str
\n
\n
\n
\njob: Job
\n
\n
\n
\ninputs: Optional[List[Dataset]]
\n
\n
\n
\noutputs: Optional[List[Dataset]]
\n
\n
\n
\n
\nclass openlineage.client.run.Run(runId, facets=_Nothing.NOTHING)
\n

Bases: RedactMixin

\n
\n
Parameters:
\n
    \n
  • runId (str)

  • \n
  • facets (Dict[Any, Any])

  • \n
\n
\n
\n
\n
\nrunId: str
\n
\n
\n
\nfacets: Dict[Any, Any]
\n
\n
\n
\ncheck(attribute, value)
\n
\n
Parameters:
\n
    \n
  • attribute (str)

  • \n
  • value (str)

  • \n
\n
\n
Return type:
\n

None

\n
\n
\n
\n
\n
\n
\nclass openlineage.client.run.RunEvent(eventType, eventTime, run, job, producer, inputs=_Nothing.NOTHING, outputs=_Nothing.NOTHING, schemaURL='https://openlineage.io/spec/1-0-5/OpenLineage.json#/definitions/RunEvent')
\n

Bases: RedactMixin

\n
\n
Parameters:
\n
    \n
  • eventType (RunState)

  • \n
  • eventTime (str)

  • \n
  • run (Run)

  • \n
  • job (Job)

  • \n
  • producer (str)

  • \n
  • inputs (Optional[List[Dataset]])

  • \n
  • outputs (Optional[List[Dataset]])

  • \n
  • schemaURL (str)

  • \n
\n
\n
\n
\n
\neventType: RunState
\n
\n
\n
\neventTime: str
\n
\n
\n
\nrun: Run
\n
\n
\n
\njob: Job
\n
\n
\n
\nproducer: str
\n
\n
\n
\ninputs: Optional[List[Dataset]]
\n
\n
\n
\noutputs: Optional[List[Dataset]]
\n
\n
\n
\nschemaURL: str
\n
\n
\n
\ncheck(attribute, value)
\n
\n
Parameters:
\n
    \n
  • attribute (str)

  • \n
  • value (str)

  • \n
\n
\n
Return type:
\n

None

\n
\n
\n
\n
\n
\n
\n

openlineage.client.serde module

\n
\n
\nclass openlineage.client.serde.Serde
\n

Bases: object

\n
\n
\nclassmethod remove_nulls_and_enums(obj)
\n
\n
Parameters:
\n

obj (Any)

\n
\n
Return type:
\n

Any

\n
\n
\n
\n
\n
\nclassmethod to_dict(obj)
\n
\n
Parameters:
\n

obj (Any)

\n
\n
Return type:
\n

dict[Any, Any]

\n
\n
\n
\n
\n
\nclassmethod to_json(obj)
\n
\n
Parameters:
\n

obj (Any)

\n
\n
Return type:
\n

str

\n
\n
\n
\n
\n
\n
\n

openlineage.client.utils module

\n
\n
\nopenlineage.client.utils.import_from_string(path)
\n
\n
Parameters:
\n

path (str)

\n
\n
Return type:
\n

type[Any]

\n
\n
\n
\n
\n
\nopenlineage.client.utils.try_import_from_string(path)
\n
\n
Parameters:
\n

path (str)

\n
\n
Return type:
\n

type[Any] | None

\n
\n
\n
\n
\n
\nopenlineage.client.utils.get_only_specified_fields(clazz, params)
\n
\n
Parameters:
\n
    \n
  • clazz (type[Any])

  • \n
  • params (dict[str, Any])

  • \n
\n
\n
Return type:
\n

dict[str, Any]

\n
\n
\n
\n
\n
\nopenlineage.client.utils.deep_merge_dicts(dict1, dict2)
\n

Deep merges two dictionaries.

\n

This function merges two dictionaries while handling nested dictionaries.\nFor keys that exist in both dictionaries, the values from dict2 take precedence.\nIf a key exists in both dictionaries and the values are dictionaries themselves,\nthey are merged recursively.\nThis function merges only dictionaries. If key is of different type, e.g. list\nit does not work properly.

\n
\n
Parameters:
\n
    \n
  • dict1 (dict[Any, Any])

  • \n
  • dict2 (dict[Any, Any])

  • \n
\n
\n
Return type:
\n

dict[Any, Any]

\n
\n
\n
\n
\n
\nclass openlineage.client.utils.RedactMixin
\n

Bases: object

\n
\n
\nproperty skip_redact: list[str]
\n
\n
\n
\n
\n

openlineage.client.uuid module

\n
\n
\nopenlineage.client.uuid.generate_new_uuid(instant=None)
\n

Generate new UUID for an instant of time. Each function call returns a new UUID value.

\n

UUID version is an implementation detail, and should not be relied on.\nFor now it is [UUIDv7](https://datatracker.ietf.org/doc/rfc9562/), so for increasing instant values,\nreturned UUID is always greater than previous one.

\n

Using uuid6 lib implementation (MIT License), with few changes:\n* oittaa/uuid6-python\n* oittaa/uuid6-python

\n

Added in v1.15.0

\n
\n
Parameters:
\n

instant (datetime | None) \u2013 instant of time used to generate UUID. If not provided, current time is used.

\n
\n
Return type:
\n

UUID

\n
\n
Returns:
\n

UUID

\n
\n
\n
\n
\n
\nopenlineage.client.uuid.generate_static_uuid(instant, data)
\n

Generate UUID for instant of time and input data.\nCalling function with same arguments always produces the same result.

\n

UUID version is an implementation detail, and **should not* be relied on.\nFor now it is [UUIDv7](https://datatracker.ietf.org/doc/rfc9562/), so for increasing instant values,\nreturned UUID is always greater than previous one. The only difference from RFC 9562 is that\nleast significant bytes are not random, but instead a SHA-1 hash of input data.

\n

Using uuid6 lib implementation (MIT License), with few changes:\n* oittaa/uuid6-python\n* oittaa/uuid6-python

\n

Added in v1.15.0

\n
\n
Parameters:
\n
    \n
  • instant (datetime) \u2013 instant of time used to generate UUID. If not provided, current time is used.

  • \n
  • data (bytes) \u2013 input data to generate random part from.

  • \n
\n
\n
Return type:
\n

UUID

\n
\n
Returns:
\n

UUID

\n
\n
\n
\n
\n
\n

openlineage.client.generated.base module

\n
\n
\nopenlineage.client.generated.base.set_producer(producer)
\n
\n
Parameters:
\n

producer (str)

\n
\n
Return type:
\n

None

\n
\n
\n
\n
\n
\nclass openlineage.client.generated.base.BaseEvent(*, eventTime, producer='')
\n

Bases: RedactMixin

\n
\n
Parameters:
\n
    \n
  • eventTime (str)

  • \n
  • producer (str)

  • \n
\n
\n
\n
\n
\neventTime: str
\n

the time the event occurred at

\n
\n
\n
\nproducer: str
\n
\n
\n
\nschemaURL: str
\n
\n
\n
\nproperty skip_redact
\n
\n
\n
\neventtime_check(attribute, value)
\n
\n
Parameters:
\n
    \n
  • attribute (str)

  • \n
  • value (str)

  • \n
\n
\n
Return type:
\n

None

\n
\n
\n
\n
\n
\nproducer_check(attribute, value)
\n
\n
Parameters:
\n
    \n
  • attribute (str)

  • \n
  • value (str)

  • \n
\n
\n
Return type:
\n

None

\n
\n
\n
\n
\n
\nschemaurl_check(attribute, value)
\n
\n
Parameters:
\n
    \n
  • attribute (str)

  • \n
  • value (str)

  • \n
\n
\n
Return type:
\n

None

\n
\n
\n
\n
\n
\n
\nclass openlineage.client.generated.base.BaseFacet(*, producer='')
\n

Bases: RedactMixin

\n

all fields of the base facet are prefixed with _ to avoid name conflicts in facets

\n
\n
Parameters:
\n

producer (str)

\n
\n
\n
\n
\nproperty skip_redact
\n
\n
\n
\n
\nclass openlineage.client.generated.base.Dataset(namespace, name, *, facets=_Nothing.NOTHING)
\n

Bases: RedactMixin

\n
\n
Parameters:
\n
    \n
  • namespace (str)

  • \n
  • name (str)

  • \n
  • facets (dict[str, DatasetFacet] | None)

  • \n
\n
\n
\n
\n
\nnamespace: str
\n

The namespace containing that dataset

\n
\n
\n
\nname: str
\n

The unique name for that dataset within that namespace

\n
\n
\n
\nfacets: dict[str, DatasetFacet] | None
\n

The facets for this dataset

\n
\n
\n
\n
\nclass openlineage.client.generated.base.DatasetEvent(*, eventTime, producer='', dataset)
\n

Bases: BaseEvent

\n
\n
Parameters:
\n
    \n
  • eventTime (str)

  • \n
  • producer (str)

  • \n
  • dataset (StaticDataset)

  • \n
\n
\n
\n
\n
\ndataset: StaticDataset
\n
\n
\n
\n
\nclass openlineage.client.generated.base.DatasetFacet(*, producer='', deleted=None)
\n

Bases: BaseFacet

\n

A Dataset Facet

\n
\n
Parameters:
\n
    \n
  • producer (str)

  • \n
  • deleted (bool | None)

  • \n
\n
\n
\n
\n
\n
\nclass openlineage.client.generated.base.EventType(value)
\n

Bases: Enum

\n

the current transition of the run state. It is required to issue 1 START event and 1 of [ COMPLETE,\nABORT, FAIL ] event per run. Additional events with OTHER eventType can be added to the same run.\nFor example to send additional metadata after the run is complete

\n
\n
\nSTART = 'START'
\n
\n
\n
\nRUNNING = 'RUNNING'
\n
\n
\n
\nCOMPLETE = 'COMPLETE'
\n
\n
\n
\nABORT = 'ABORT'
\n
\n
\n
\nFAIL = 'FAIL'
\n
\n
\n
\nOTHER = 'OTHER'
\n
\n
\n
\n
\nclass openlineage.client.generated.base.InputDataset(namespace, name, inputFacets=_Nothing.NOTHING, *, facets=_Nothing.NOTHING)
\n

Bases: Dataset

\n

An input dataset

\n
\n
\ninputFacets: dict[str, InputDatasetFacet] | None
\n

The input facets for this dataset.

\n
\n
\n
\n
\nclass openlineage.client.generated.base.InputDatasetFacet(*, producer='')
\n

Bases: BaseFacet

\n

An Input Dataset Facet

\n
\n
Parameters:
\n

producer (str)

\n
\n
\n
\n
\n
\nclass openlineage.client.generated.base.Job(namespace, name, facets=_Nothing.NOTHING)
\n

Bases: RedactMixin

\n
\n
Parameters:
\n
    \n
  • namespace (str)

  • \n
  • name (str)

  • \n
  • facets (dict[str, JobFacet] | None)

  • \n
\n
\n
\n
\n
\nnamespace: str
\n

The namespace containing that job

\n
\n
\n
\nname: str
\n

The unique name for that job within that namespace

\n
\n
\n
\nfacets: dict[str, JobFacet] | None
\n

The job facets.

\n
\n
\n
\n
\nclass openlineage.client.generated.base.JobEvent(*, eventTime, producer='', job, inputs=_Nothing.NOTHING, outputs=_Nothing.NOTHING)
\n

Bases: BaseEvent

\n
\n
\njob: Job
\n
\n
\n
\ninputs: list[InputDataset] | None
\n

The set of input datasets.

\n
\n
\n
\noutputs: list[OutputDataset] | None
\n

The set of output datasets.

\n
\n
\n
\n
\nclass openlineage.client.generated.base.JobFacet(*, producer='', deleted=None)
\n

Bases: BaseFacet

\n

A Job Facet

\n
\n
Parameters:
\n
    \n
  • producer (str)

  • \n
  • deleted (bool | None)

  • \n
\n
\n
\n
\n
\n
\nclass openlineage.client.generated.base.OutputDataset(namespace, name, outputFacets=_Nothing.NOTHING, *, facets=_Nothing.NOTHING)
\n

Bases: Dataset

\n

An output dataset

\n
\n
\noutputFacets: dict[str, OutputDatasetFacet] | None
\n

The output facets for this dataset

\n
\n
\n
\n
\nclass openlineage.client.generated.base.OutputDatasetFacet(*, producer='')
\n

Bases: BaseFacet

\n

An Output Dataset Facet

\n
\n
Parameters:
\n

producer (str)

\n
\n
\n
\n
\n
\nclass openlineage.client.generated.base.Run(runId, facets=_Nothing.NOTHING)
\n

Bases: RedactMixin

\n
\n
Parameters:
\n
    \n
  • runId (str)

  • \n
  • facets (dict[str, RunFacet] | None)

  • \n
\n
\n
\n
\n
\nrunId: str
\n

The globally unique ID of the run associated with the job.

\n
\n
\n
\nfacets: dict[str, RunFacet] | None
\n

The run facets.

\n
\n
\n
\nrunid_check(attribute, value)
\n
\n
Parameters:
\n
    \n
  • attribute (str)

  • \n
  • value (str)

  • \n
\n
\n
Return type:
\n

None

\n
\n
\n
\n
\n
\n
\nclass openlineage.client.generated.base.RunEvent(*, eventTime, producer='', run, job, eventType=None, inputs=_Nothing.NOTHING, outputs=_Nothing.NOTHING)
\n

Bases: BaseEvent

\n
\n
Parameters:
\n
    \n
  • eventTime (str)

  • \n
  • producer (str)

  • \n
  • run (Run)

  • \n
  • job (Job)

  • \n
  • eventType (EventType | None)

  • \n
  • inputs (list[InputDataset] | None)

  • \n
  • outputs (list[OutputDataset] | None)

  • \n
\n
\n
\n
\n
\nrun: Run
\n
\n
\n
\njob: Job
\n
\n
\n
\neventType: EventType | None
\n

the current transition of the run state. It is required to issue 1 START event and 1 of [ COMPLETE,\nABORT, FAIL ] event per run. Additional events with OTHER eventType can be added to the same run.\nFor example to send additional metadata after the run is complete

\n
\n
\n
\ninputs: list[InputDataset] | None
\n

The set of input datasets.

\n
\n
\n
\noutputs: list[OutputDataset] | None
\n

The set of output datasets.

\n
\n
\n
\n
\nclass openlineage.client.generated.base.RunFacet(*, producer='')
\n

Bases: BaseFacet

\n

A Run Facet

\n
\n
Parameters:
\n

producer (str)

\n
\n
\n
\n
\n
\nclass openlineage.client.generated.base.StaticDataset(namespace, name, *, facets=_Nothing.NOTHING)
\n

Bases: Dataset

\n

A Dataset sent within static metadata events

\n
\n
Parameters:
\n
    \n
  • namespace (str)

  • \n
  • name (str)

  • \n
  • facets (dict[str, DatasetFacet] | None)

  • \n
\n
\n
\n
\n
\n
\n

openlineage.client.generated.column_lineage_dataset module

\n
\n
\nclass openlineage.client.generated.column_lineage_dataset.ColumnLineageDatasetFacet(fields, dataset=_Nothing.NOTHING, *, producer='', deleted=None)
\n

Bases: DatasetFacet

\n
\n
Parameters:
\n
    \n
  • fields (dict[str, Fields])

  • \n
  • dataset (list[InputField] | None)

  • \n
  • producer (str)

  • \n
  • deleted (bool | None)

  • \n
\n
\n
\n
\n
\nfields: dict[str, Fields]
\n

Column level lineage that maps output fields into input fields used to evaluate them.

\n
\n
\n
\ndataset: list[InputField] | None
\n

Column level lineage that affects the whole dataset. This includes filtering, sorting, grouping\n(aggregates), joining, window functions, etc.

\n
\n
\n
\n
\nclass openlineage.client.generated.column_lineage_dataset.Fields(inputFields, transformationDescription=None, transformationType=None)
\n

Bases: RedactMixin

\n
\n
Parameters:
\n
    \n
  • inputFields (list[InputField])

  • \n
  • transformationDescription (str | None)

  • \n
  • transformationType (str | None)

  • \n
\n
\n
\n
\n
\ninputFields: list[InputField]
\n
\n
\n
\ntransformationDescription: str | None
\n

a string representation of the transformation applied

\n
\n
\n
\ntransformationType: str | None
\n

no\noriginal data available (like a hash of PII for example)

\n
\n
Type:
\n

IDENTITY|MASKED reflects a clearly defined behavior. IDENTITY

\n
\n
Type:
\n

exact same as input; MASKED

\n
\n
\n
\n
\n
\n
\nclass openlineage.client.generated.column_lineage_dataset.InputField(namespace, name, field, transformations=_Nothing.NOTHING)
\n

Bases: RedactMixin

\n

Represents a single dependency on some field (column).

\n
\n
Parameters:
\n
    \n
  • namespace (str)

  • \n
  • name (str)

  • \n
  • field (str)

  • \n
  • transformations (list[Transformation] | None)

  • \n
\n
\n
\n
\n
\nnamespace: str
\n

The input dataset namespace

\n
\n
\n
\nname: str
\n

The input dataset name

\n
\n
\n
\nfield: str
\n

The input field

\n
\n
\n
\ntransformations: list[Transformation] | None
\n
\n
\n
\n
\nclass openlineage.client.generated.column_lineage_dataset.Transformation(type, subtype=None, description=None, masking=None)
\n

Bases: RedactMixin

\n
\n
Parameters:
\n
    \n
  • type (str)

  • \n
  • subtype (str | None)

  • \n
  • description (str | None)

  • \n
  • masking (bool | None)

  • \n
\n
\n
\n
\n
\ntype: str
\n

DIRECT, INDIRECT

\n
\n
Type:
\n

The type of the transformation. Allowed values are

\n
\n
\n
\n
\n
\nsubtype: str | None
\n

The subtype of the transformation

\n
\n
\n
\ndescription: str | None
\n

a string representation of the transformation applied

\n
\n
\n
\nmasking: bool | None
\n

is transformation masking the data or not

\n
\n
\n
\n
\n

openlineage.client.generated.data_quality_assertions_dataset module

\n
\n
\nclass openlineage.client.generated.data_quality_assertions_dataset.Assertion(assertion, success, column=None)
\n

Bases: RedactMixin

\n
\n
Parameters:
\n
    \n
  • assertion (str)

  • \n
  • success (bool)

  • \n
  • column (str | None)

  • \n
\n
\n
\n
\n
\nassertion: str
\n

Type of expectation test that dataset is subjected to

\n
\n
\n
\nsuccess: bool
\n
\n
\n
\ncolumn: str | None
\n

Column that expectation is testing. It should match the name provided in SchemaDatasetFacet. If\ncolumn field is empty, then expectation refers to whole dataset.

\n
\n
\n
\n
\nclass openlineage.client.generated.data_quality_assertions_dataset.DataQualityAssertionsDatasetFacet(assertions, *, producer='')
\n

Bases: InputDatasetFacet

\n

list of tests performed on dataset or dataset columns, and their results

\n
\n
Parameters:
\n
    \n
  • assertions (list[Assertion])

  • \n
  • producer (str)

  • \n
\n
\n
\n
\n
\nassertions: list[Assertion]
\n
\n
\n
\n
\n

openlineage.client.generated.data_quality_metrics_input_dataset module

\n
\n
\nclass openlineage.client.generated.data_quality_metrics_input_dataset.ColumnMetrics(nullCount=None, distinctCount=None, sum=None, count=None, min=None, max=None, quantiles=_Nothing.NOTHING)
\n

Bases: RedactMixin

\n
\n
Parameters:
\n
    \n
  • nullCount (int | None)

  • \n
  • distinctCount (int | None)

  • \n
  • sum (float | None)

  • \n
  • count (float | None)

  • \n
  • min (float | None)

  • \n
  • max (float | None)

  • \n
  • quantiles (dict[str, float] | None)

  • \n
\n
\n
\n
\n
\nnullCount: int | None
\n

The number of null values in this column for the rows evaluated

\n
\n
\n
\ndistinctCount: int | None
\n

The number of distinct values in this column for the rows evaluated

\n
\n
\n
\nsum: float | None
\n

The total sum of values in this column for the rows evaluated

\n
\n
\n
\ncount: float | None
\n

The number of values in this column

\n
\n
\n
\nmin: float | None
\n
\n
\n
\nmax: float | None
\n
\n
\n
\nquantiles: dict[str, float] | None
\n

0.1 0.25 0.5 0.75 1

\n
\n
Type:
\n

The property key is the quantile. Examples

\n
\n
\n
\n
\n
\n
\nclass openlineage.client.generated.data_quality_metrics_input_dataset.DataQualityMetricsInputDatasetFacet(columnMetrics, rowCount=None, bytes=None, fileCount=None, *, producer='')
\n

Bases: InputDatasetFacet

\n
\n
Parameters:
\n
    \n
  • columnMetrics (dict[str, ColumnMetrics])

  • \n
  • rowCount (int | None)

  • \n
  • bytes (int | None)

  • \n
  • fileCount (int | None)

  • \n
  • producer (str)

  • \n
\n
\n
\n
\n
\ncolumnMetrics: dict[str, ColumnMetrics]
\n

The property key is the column name

\n
\n
\n
\nrowCount: int | None
\n

The number of rows evaluated

\n
\n
\n
\nbytes: int | None
\n

The size in bytes

\n
\n
\n
\nfileCount: int | None
\n

The number of files evaluated

\n
\n
\n
\n
\n

openlineage.client.generated.dataset_version_dataset module

\n
\n
\nclass openlineage.client.generated.dataset_version_dataset.DatasetVersionDatasetFacet(datasetVersion, *, producer='', deleted=None)
\n

Bases: DatasetFacet

\n
\n
Parameters:
\n
    \n
  • datasetVersion (str)

  • \n
  • producer (str)

  • \n
  • deleted (bool | None)

  • \n
\n
\n
\n
\n
\ndatasetVersion: str
\n

The version of the dataset.

\n
\n
\n
\n
\n

openlineage.client.generated.datasource_dataset module

\n
\n
\nclass openlineage.client.generated.datasource_dataset.DatasourceDatasetFacet(name=None, uri=None, *, producer='', deleted=None)
\n

Bases: DatasetFacet

\n
\n
Parameters:
\n
    \n
  • name (str | None)

  • \n
  • uri (str | None)

  • \n
  • producer (str)

  • \n
  • deleted (bool | None)

  • \n
\n
\n
\n
\n
\nname: str | None
\n
\n
\n
\nuri: str | None
\n
\n
\n
\nuri_check(attribute, value)
\n
\n
Parameters:
\n
    \n
  • attribute (str)

  • \n
  • value (str)

  • \n
\n
\n
Return type:
\n

None

\n
\n
\n
\n
\n
\n
\n

openlineage.client.generated.documentation_dataset module

\n
\n
\nclass openlineage.client.generated.documentation_dataset.DocumentationDatasetFacet(description, *, producer='', deleted=None)
\n

Bases: DatasetFacet

\n
\n
Parameters:
\n
    \n
  • description (str)

  • \n
  • producer (str)

  • \n
  • deleted (bool | None)

  • \n
\n
\n
\n
\n
\ndescription: str
\n

The description of the dataset.

\n
\n
\n
\n
\n

openlineage.client.generated.documentation_job module

\n
\n
\nclass openlineage.client.generated.documentation_job.DocumentationJobFacet(description, *, producer='', deleted=None)
\n

Bases: JobFacet

\n
\n
Parameters:
\n
    \n
  • description (str)

  • \n
  • producer (str)

  • \n
  • deleted (bool | None)

  • \n
\n
\n
\n
\n
\ndescription: str
\n

The description of the job.

\n
\n
\n
\n
\n

openlineage.client.generated.error_message_run module

\n
\n
\nclass openlineage.client.generated.error_message_run.ErrorMessageRunFacet(message, programmingLanguage, stackTrace=None, *, producer='')
\n

Bases: RunFacet

\n
\n
Parameters:
\n
    \n
  • message (str)

  • \n
  • programmingLanguage (str)

  • \n
  • stackTrace (str | None)

  • \n
  • producer (str)

  • \n
\n
\n
\n
\n
\nmessage: str
\n

A human-readable string representing error message generated by observed system

\n
\n
\n
\nprogrammingLanguage: str
\n

Programming language the observed system uses.

\n
\n
\n
\nstackTrace: str | None
\n

A language-specific stack trace generated by observed system

\n
\n
\n
\n
\n

openlineage.client.generated.external_query_run module

\n
\n
\nclass openlineage.client.generated.external_query_run.ExternalQueryRunFacet(externalQueryId, source, *, producer='')
\n

Bases: RunFacet

\n
\n
Parameters:
\n
    \n
  • externalQueryId (str)

  • \n
  • source (str)

  • \n
  • producer (str)

  • \n
\n
\n
\n
\n
\nexternalQueryId: str
\n

Identifier for the external system

\n
\n
\n
\nsource: str
\n

source of the external query

\n
\n
\n
\n
\n

openlineage.client.generated.extraction_error_run module

\n
\n
\nclass openlineage.client.generated.extraction_error_run.Error(errorMessage, stackTrace=None, task=None, taskNumber=None)
\n

Bases: RedactMixin

\n
\n
Parameters:
\n
    \n
  • errorMessage (str)

  • \n
  • stackTrace (str | None)

  • \n
  • task (str | None)

  • \n
  • taskNumber (int | None)

  • \n
\n
\n
\n
\n
\nerrorMessage: str
\n

Text representation of extraction error message.

\n
\n
\n
\nstackTrace: str | None
\n

Stack trace of extraction error message

\n
\n
\n
\ntask: str | None
\n

Text representation of task that failed. This can be, for example, SQL statement that parser could\nnot interpret.

\n
\n
\n
\ntaskNumber: int | None
\n

Order of task (counted from 0).

\n
\n
\n
\n
\nclass openlineage.client.generated.extraction_error_run.ExtractionErrorRunFacet(totalTasks, failedTasks, errors, *, producer='')
\n

Bases: RunFacet

\n
\n
Parameters:
\n
    \n
  • totalTasks (int)

  • \n
  • failedTasks (int)

  • \n
  • errors (list[Error])

  • \n
  • producer (str)

  • \n
\n
\n
\n
\n
\ntotalTasks: int
\n

The number of distinguishable tasks in a run that were processed by OpenLineage, whether\nsuccessfully or not. Those could be, for example, distinct SQL statements.

\n
\n
\n
\nfailedTasks: int
\n

The number of distinguishable tasks in a run that were processed not successfully by OpenLineage.\nThose could be, for example, distinct SQL statements.

\n
\n
\n
\nerrors: list[Error]
\n
\n
\n
\n
\n

openlineage.client.generated.job_type_job module

\n
\n
\nclass openlineage.client.generated.job_type_job.JobTypeJobFacet(processingType, integration, jobType=None, *, producer='', deleted=None)
\n

Bases: JobFacet

\n
\n
Parameters:
\n
    \n
  • processingType (str)

  • \n
  • integration (str)

  • \n
  • jobType (str | None)

  • \n
  • producer (str)

  • \n
  • deleted (bool | None)

  • \n
\n
\n
\n
\n
\nprocessingType: str
\n

BATCH or STREAMING

\n
\n
Type:
\n

Job processing type like

\n
\n
\n
\n
\n
\nintegration: str
\n

for example SPARK|DBT|AIRFLOW|FLINK

\n
\n
Type:
\n

OpenLineage integration type of this job

\n
\n
\n
\n
\n
\njobType: str | None
\n

QUERY|COMMAND|DAG|TASK|JOB|MODEL. This is an integration-specific field.

\n
\n
Type:
\n

Run type, for example

\n
\n
\n
\n
\n
\n
\n

openlineage.client.generated.lifecycle_state_change_dataset module

\n
\n
\nclass openlineage.client.generated.lifecycle_state_change_dataset.LifecycleStateChange(value)
\n

Bases: Enum

\n

The lifecycle state change.

\n
\n
\nALTER = 'ALTER'
\n
\n
\n
\nCREATE = 'CREATE'
\n
\n
\n
\nDROP = 'DROP'
\n
\n
\n
\nOVERWRITE = 'OVERWRITE'
\n
\n
\n
\nRENAME = 'RENAME'
\n
\n
\n
\nTRUNCATE = 'TRUNCATE'
\n
\n
\n
\n
\nclass openlineage.client.generated.lifecycle_state_change_dataset.LifecycleStateChangeDatasetFacet(lifecycleStateChange, previousIdentifier=None, *, producer='', deleted=None)
\n

Bases: DatasetFacet

\n
\n
Parameters:
\n
    \n
  • lifecycleStateChange (LifecycleStateChange)

  • \n
  • previousIdentifier (PreviousIdentifier | None)

  • \n
  • producer (str)

  • \n
  • deleted (bool | None)

  • \n
\n
\n
\n
\n
\nlifecycleStateChange: LifecycleStateChange
\n

The lifecycle state change.

\n
\n
\n
\npreviousIdentifier: PreviousIdentifier | None
\n

Previous name of the dataset in case of renaming it.

\n
\n
\n
\n
\nclass openlineage.client.generated.lifecycle_state_change_dataset.PreviousIdentifier(name, namespace)
\n

Bases: RedactMixin

\n

Previous name of the dataset in case of renaming it.

\n
\n
Parameters:
\n
    \n
  • name (str)

  • \n
  • namespace (str)

  • \n
\n
\n
\n
\n
\nname: str
\n
\n
\n
\nnamespace: str
\n
\n
\n
\n
\n

openlineage.client.generated.nominal_time_run module

\n
\n
\nclass openlineage.client.generated.nominal_time_run.NominalTimeRunFacet(nominalStartTime, nominalEndTime=None, *, producer='')
\n

Bases: RunFacet

\n
\n
Parameters:
\n
    \n
  • nominalStartTime (str)

  • \n
  • nominalEndTime (str | None)

  • \n
  • producer (str)

  • \n
\n
\n
\n
\n
\nnominalStartTime: str
\n

//en.wikipedia.org/wiki/ISO_8601) timestamp representing the nominal start time\n(included) of the run. AKA the schedule time

\n
\n
Type:
\n

An [ISO-8601](https

\n
\n
\n
\n
\n
\nnominalEndTime: str | None
\n

//en.wikipedia.org/wiki/ISO_8601) timestamp representing the nominal end time\n(excluded) of the run. (Should be the nominal start time of the next run)

\n
\n
Type:
\n

An [ISO-8601](https

\n
\n
\n
\n
\n
\nnominalstarttime_check(attribute, value)
\n
\n
Parameters:
\n
    \n
  • attribute (str)

  • \n
  • value (str)

  • \n
\n
\n
Return type:
\n

None

\n
\n
\n
\n
\n
\nnominalendtime_check(attribute, value)
\n
\n
Parameters:
\n
    \n
  • attribute (str)

  • \n
  • value (str)

  • \n
\n
\n
Return type:
\n

None

\n
\n
\n
\n
\n
\n
\n

openlineage.client.generated.output_statistics_output_dataset module

\n
\n
\nclass openlineage.client.generated.output_statistics_output_dataset.OutputStatisticsOutputDatasetFacet(rowCount=None, size=None, fileCount=None, *, producer='')
\n

Bases: OutputDatasetFacet

\n
\n
Parameters:
\n
    \n
  • rowCount (int | None)

  • \n
  • size (int | None)

  • \n
  • fileCount (int | None)

  • \n
  • producer (str)

  • \n
\n
\n
\n
\n
\nrowCount: int | None
\n

The number of rows written to the dataset

\n
\n
\n
\nsize: int | None
\n

The size in bytes written to the dataset

\n
\n
\n
\nfileCount: int | None
\n

The number of files written to the dataset

\n
\n
\n
\n
\n

openlineage.client.generated.ownership_dataset module

\n
\n
\nclass openlineage.client.generated.ownership_dataset.Owner(name, type=None)
\n

Bases: RedactMixin

\n
\n
Parameters:
\n
    \n
  • name (str)

  • \n
  • type (str | None)

  • \n
\n
\n
\n
\n
\nname: str
\n

the identifier of the owner of the Dataset. It is recommended to define this as a URN. For example\napplication:foo, user:jdoe, team:data

\n
\n
\n
\ntype: str | None
\n

The type of ownership (optional)

\n
\n
\n
\n
\nclass openlineage.client.generated.ownership_dataset.OwnershipDatasetFacet(owners=_Nothing.NOTHING, *, producer='', deleted=None)
\n

Bases: DatasetFacet

\n
\n
Parameters:
\n
    \n
  • owners (list[Owner] | None)

  • \n
  • producer (str)

  • \n
  • deleted (bool | None)

  • \n
\n
\n
\n
\n
\nowners: list[Owner] | None
\n

The owners of the dataset.

\n
\n
\n
\n
\n

openlineage.client.generated.ownership_job module

\n
\n
\nclass openlineage.client.generated.ownership_job.Owner(name, type=None)
\n

Bases: RedactMixin

\n
\n
Parameters:
\n
    \n
  • name (str)

  • \n
  • type (str | None)

  • \n
\n
\n
\n
\n
\nname: str
\n

the identifier of the owner of the Job. It is recommended to define this as a URN. For example\napplication:foo, user:jdoe, team:data

\n
\n
\n
\ntype: str | None
\n

The type of ownership (optional)

\n
\n
\n
\n
\nclass openlineage.client.generated.ownership_job.OwnershipJobFacet(owners=_Nothing.NOTHING, *, producer='', deleted=None)
\n

Bases: JobFacet

\n
\n
Parameters:
\n
    \n
  • owners (list[Owner] | None)

  • \n
  • producer (str)

  • \n
  • deleted (bool | None)

  • \n
\n
\n
\n
\n
\nowners: list[Owner] | None
\n

The owners of the job.

\n
\n
\n
\n
\n

openlineage.client.generated.parent_run module

\n
\n
\nclass openlineage.client.generated.parent_run.Job(namespace, name)
\n

Bases: RedactMixin

\n
\n
Parameters:
\n
    \n
  • namespace (str)

  • \n
  • name (str)

  • \n
\n
\n
\n
\n
\nnamespace: str
\n

The namespace containing that job

\n
\n
\n
\nname: str
\n

The unique name for that job within that namespace

\n
\n
\n
\n
\nclass openlineage.client.generated.parent_run.ParentRunFacet(run, job, *, producer='')
\n

Bases: RunFacet

\n

the id of the parent run and job, iff this run was spawn from an other run (for example, the Dag run\nscheduling its tasks)

\n
\n
Parameters:
\n
    \n
  • run (Run)

  • \n
  • job (Job)

  • \n
  • producer (str)

  • \n
\n
\n
\n
\n
\nrun: Run
\n
\n
\n
\njob: Job
\n
\n
\n
\nclassmethod create(runId, namespace, name)
\n
\n
Parameters:
\n
    \n
  • runId (str)

  • \n
  • namespace (str)

  • \n
  • name (str)

  • \n
\n
\n
Return type:
\n

ParentRunFacet

\n
\n
\n
\n
\n
\n
\nclass openlineage.client.generated.parent_run.Run(runId)
\n

Bases: RedactMixin

\n
\n
Parameters:
\n

runId (str)

\n
\n
\n
\n
\nrunId: str
\n

The globally unique ID of the run associated with the job.

\n
\n
\n
\nrunid_check(attribute, value)
\n
\n
Parameters:
\n
    \n
  • attribute (str)

  • \n
  • value (str)

  • \n
\n
\n
Return type:
\n

None

\n
\n
\n
\n
\n
\n
\n

openlineage.client.generated.processing_engine_run module

\n
\n
\nclass openlineage.client.generated.processing_engine_run.ProcessingEngineRunFacet(version, name=None, openlineageAdapterVersion=None, *, producer='')
\n

Bases: RunFacet

\n
\n
Parameters:
\n
    \n
  • version (str)

  • \n
  • name (str | None)

  • \n
  • openlineageAdapterVersion (str | None)

  • \n
  • producer (str)

  • \n
\n
\n
\n
\n
\nversion: str
\n

Processing engine version. Might be Airflow or Spark version.

\n
\n
\n
\nname: str | None
\n

Processing engine name, e.g. Airflow or Spark

\n
\n
\n
\nopenlineageAdapterVersion: str | None
\n

OpenLineage adapter package version. Might be e.g. OpenLineage Airflow integration package version

\n
\n
\n
\n
\n

openlineage.client.generated.schema_dataset module

\n
\n
\nclass openlineage.client.generated.schema_dataset.SchemaDatasetFacet(fields=_Nothing.NOTHING, *, producer='', deleted=None)
\n

Bases: DatasetFacet

\n
\n
Parameters:
\n
    \n
  • fields (list[SchemaDatasetFacetFields] | None)

  • \n
  • producer (str)

  • \n
  • deleted (bool | None)

  • \n
\n
\n
\n
\n
\nfields: list[SchemaDatasetFacetFields] | None
\n

The fields of the data source.

\n
\n
\n
\n
\nclass openlineage.client.generated.schema_dataset.SchemaDatasetFacetFields(name, type=None, description=None, fields=_Nothing.NOTHING)
\n

Bases: RedactMixin

\n
\n
Parameters:
\n
    \n
  • name (str)

  • \n
  • type (str | None)

  • \n
  • description (str | None)

  • \n
  • fields (list[SchemaDatasetFacetFields] | None)

  • \n
\n
\n
\n
\n
\nname: str
\n

The name of the field.

\n
\n
\n
\ntype: str | None
\n

The type of the field.

\n
\n
\n
\ndescription: str | None
\n

The description of the field.

\n
\n
\n
\nfields: list[SchemaDatasetFacetFields] | None
\n

Nested struct fields.

\n
\n
\n
\n
\n

openlineage.client.generated.source_code_job module

\n
\n
\nclass openlineage.client.generated.source_code_job.SourceCodeJobFacet(language, sourceCode, *, producer='', deleted=None)
\n

Bases: JobFacet

\n
\n
Parameters:
\n
    \n
  • language (str)

  • \n
  • sourceCode (str)

  • \n
  • producer (str)

  • \n
  • deleted (bool | None)

  • \n
\n
\n
\n
\n
\nlanguage: str
\n

Language in which source code of this job was written.

\n
\n
\n
\nsourceCode: str
\n

Source code of this job.

\n
\n
\n
\n
\n

openlineage.client.generated.source_code_location_job module

\n
\n
\nclass openlineage.client.generated.source_code_location_job.SourceCodeLocationJobFacet(type, url, repoUrl=None, path=None, version=None, tag=None, branch=None, *, producer='', deleted=None)
\n

Bases: JobFacet

\n
\n
Parameters:
\n
    \n
  • type (str)

  • \n
  • url (str)

  • \n
  • repoUrl (str | None)

  • \n
  • path (str | None)

  • \n
  • version (str | None)

  • \n
  • tag (str | None)

  • \n
  • branch (str | None)

  • \n
  • producer (str)

  • \n
  • deleted (bool | None)

  • \n
\n
\n
\n
\n
\ntype: str
\n

the source control system

\n
\n
\n
\nurl: str
\n

the full http URL to locate the file

\n
\n
\n
\nrepoUrl: str | None
\n

the URL to the repository

\n
\n
\n
\npath: str | None
\n

the path in the repo containing the source files

\n
\n
\n
\nversion: str | None
\n

the current version deployed (not a branch name, the actual unique version)

\n
\n
\n
\ntag: str | None
\n

optional tag name

\n
\n
\n
\nbranch: str | None
\n

optional branch name

\n
\n
\n
\nurl_check(attribute, value)
\n
\n
Parameters:
\n
    \n
  • attribute (str)

  • \n
  • value (str)

  • \n
\n
\n
Return type:
\n

None

\n
\n
\n
\n
\n
\n
\n

openlineage.client.generated.sql_job module

\n
\n
\nclass openlineage.client.generated.sql_job.SQLJobFacet(query, *, producer='', deleted=None)
\n

Bases: JobFacet

\n
\n
Parameters:
\n
    \n
  • query (str)

  • \n
  • producer (str)

  • \n
  • deleted (bool | None)

  • \n
\n
\n
\n
\n
\nquery: str
\n
\n
\n
\n
\n

openlineage.client.generated.storage_dataset module

\n
\n
\nclass openlineage.client.generated.storage_dataset.StorageDatasetFacet(storageLayer, fileFormat=None, *, producer='', deleted=None)
\n

Bases: DatasetFacet

\n
\n
Parameters:
\n
    \n
  • storageLayer (str)

  • \n
  • fileFormat (str | None)

  • \n
  • producer (str)

  • \n
  • deleted (bool | None)

  • \n
\n
\n
\n
\n
\nstorageLayer: str
\n

iceberg, delta.

\n
\n
Type:
\n

Storage layer provider with allowed values

\n
\n
\n
\n
\n
\nfileFormat: str | None
\n

parquet, orc, avro, json, csv, text, xml.

\n
\n
Type:
\n

File format with allowed values

\n
\n
\n
\n
\n
\n\n
\n

openlineage.client.transport.composite module

\n
\n
\nclass openlineage.client.transport.composite.CompositeConfig(transports, continue_on_failure=True)
\n

Bases: Config

\n

CompositeConfig is a configuration class for CompositeTransport.

\n
\n
Parameters:
\n
    \n
  • transports (list[dict[str, Any]] | dict[str, dict[str, Any]])

  • \n
  • continue_on_failure (bool)

  • \n
\n
\n
\n
\n
\ntransports
\n

A list of dictionaries, where each dictionary represents the configuration\nfor a child transport. Each dictionary should contain the necessary parameters\nto initialize a specific transport instance.

\n
\n
\n
\ncontinue_on_failure
\n

If set to True, the CompositeTransport will attempt to emit the event using\nall configured transports, regardless of whether any previous transport\nin the list failed to emit the event. If set to False, an error in one\ntransport will halt the emission process for subsequent transports.

\n
\n
\n
\ntransports: list[dict[str, Any]] | dict[str, dict[str, Any]]
\n
\n
\n
\ncontinue_on_failure: bool
\n
\n
\n
\nclassmethod from_dict(params)
\n

Create a CompositeConfig object from a dictionary.

\n
\n
Parameters:
\n

params (dict[str, Any])

\n
\n
Return type:
\n

CompositeConfig

\n
\n
\n
\n
\n
\n
\nclass openlineage.client.transport.composite.CompositeTransport(config)
\n

Bases: Transport

\n

CompositeTransport is a transport class that emits events using multiple transports.

\n
\n
Parameters:
\n

config (CompositeConfig)

\n
\n
\n
\n
\nkind: str | None = 'composite'
\n
\n
\n
\nconfig_class
\n

alias of CompositeConfig

\n
\n
\n
\nproperty transports: list[Transport]
\n

Create and return a list of transports based on the config.

\n
\n
\n
\nemit(event)
\n

Emit an event using all transports in the config.

\n
\n
Parameters:
\n

event (Event)

\n
\n
Return type:
\n

None

\n
\n
\n
\n
\n
\n
\n

openlineage.client.transport.console module

\n
\n
\nclass openlineage.client.transport.console.ConsoleConfig
\n

Bases: Config

\n
\n
\n
\n
\n
\nclass openlineage.client.transport.console.ConsoleTransport(config)
\n

Bases: Transport

\n
\n
Parameters:
\n

config (ConsoleConfig)

\n
\n
\n
\n
\nkind: str | None = 'console'
\n
\n
\n
\nconfig_class
\n

alias of ConsoleConfig

\n
\n
\n
\nemit(event)
\n
\n
Parameters:
\n

event (Union[RunEvent, DatasetEvent, JobEvent, RunEvent, DatasetEvent, JobEvent])

\n
\n
Return type:
\n

None

\n
\n
\n
\n
\n
\n
\n

openlineage.client.transport.factory module

\n
\n
\nclass openlineage.client.transport.factory.DefaultTransportFactory
\n

Bases: TransportFactory

\n
\n
\n
\n
\nregister_transport(of_type, clazz)
\n
\n
Parameters:
\n
    \n
  • of_type (str)

  • \n
  • clazz (type[Transport] | str)

  • \n
\n
\n
Return type:
\n

None

\n
\n
\n
\n
\n
\ncreate(config=None)
\n

Initializes and returns a transport mechanism based on the provided configuration.

\n

If \u2018OPENLINEAGE_DISABLED\u2019 is set to \u2018true\u2019, a NoopTransport instance is returned,\neffectively disabling transport.\nIf a configuration dictionary is provided, transport specified by the config is initialized.\nIf no configuration is provided, the function defaults to a console-based transport, logging\na warning and printing events to the console.

\n
\n
Parameters:
\n

config (dict[str, str] | None)

\n
\n
Return type:
\n

Transport

\n
\n
\n
\n
\n
\n
\n

openlineage.client.transport.file module

\n
\n
\nclass openlineage.client.transport.file.FileConfig(log_file_path, append=False)
\n

Bases: Config

\n
\n
Parameters:
\n
    \n
  • log_file_path (str)

  • \n
  • append (bool)

  • \n
\n
\n
\n
\n
\nlog_file_path: str
\n
\n
\n
\nappend: bool = False
\n
\n
\n
\nclassmethod from_dict(params)
\n
\n
Parameters:
\n

params (dict[str, Any])

\n
\n
Return type:
\n

FileConfig

\n
\n
\n
\n
\n
\n
\nclass openlineage.client.transport.file.FileTransport(config)
\n

Bases: Transport

\n
\n
Parameters:
\n

config (FileConfig)

\n
\n
\n
\n
\nkind: str | None = 'file'
\n
\n
\n
\nconfig_class
\n

alias of FileConfig

\n
\n
\n
\nemit(event)
\n
\n
Parameters:
\n

event (Union[RunEvent, DatasetEvent, JobEvent, RunEvent, DatasetEvent, JobEvent])

\n
\n
Return type:
\n

None

\n
\n
\n
\n
\n
\n
\n

openlineage.client.transport.http module

\n
\n
\nclass openlineage.client.transport.http.TokenProvider(config)
\n

Bases: object

\n
\n
Parameters:
\n

config (dict[str, str])

\n
\n
\n
\n
\nget_bearer()
\n
\n
Return type:
\n

str | None

\n
\n
\n
\n
\n
\n
\nclass openlineage.client.transport.http.HttpCompression(value)
\n

Bases: Enum

\n

An enumeration.

\n
\n
\nGZIP = 'gzip'
\n
\n
\n
\n
\nclass openlineage.client.transport.http.ApiKeyTokenProvider(config)
\n

Bases: TokenProvider

\n
\n
Parameters:
\n

config (dict[str, str])

\n
\n
\n
\n
\nget_bearer()
\n
\n
Return type:
\n

str | None

\n
\n
\n
\n
\n
\n
\nopenlineage.client.transport.http.create_token_provider(auth)
\n
\n
Parameters:
\n

auth (dict[str, str])

\n
\n
Return type:
\n

TokenProvider

\n
\n
\n
\n
\n
\nopenlineage.client.transport.http.get_session()
\n
\n
Return type:
\n

Session

\n
\n
\n
\n
\n
\nclass openlineage.client.transport.http.HttpConfig(url, endpoint='api/v1/lineage', timeout=5.0, verify=True, auth=_Nothing.NOTHING, compression=None, session=None, adapter=None, custom_headers=_Nothing.NOTHING)
\n

Bases: Config

\n
\n
Parameters:
\n
    \n
  • url (str)

  • \n
  • endpoint (str)

  • \n
  • timeout (float)

  • \n
  • verify (bool)

  • \n
  • auth (TokenProvider)

  • \n
  • compression (HttpCompression | None)

  • \n
  • session (Session | None)

  • \n
  • adapter (HTTPAdapter | None)

  • \n
  • custom_headers (dict[str, str])

  • \n
\n
\n
\n
\n
\nurl: str
\n
\n
\n
\nendpoint: str
\n
\n
\n
\ntimeout: float
\n
\n
\n
\nverify: bool
\n
\n
\n
\nauth: TokenProvider
\n
\n
\n
\ncompression: HttpCompression | None
\n
\n
\n
\nsession: Session | None
\n
\n
\n
\nadapter: HTTPAdapter | None
\n
\n
\n
\ncustom_headers: dict[str, str]
\n
\n
\n
\nclassmethod from_dict(params)
\n
\n
Parameters:
\n

params (dict[str, Any])

\n
\n
Return type:
\n

HttpConfig

\n
\n
\n
\n
\n
\nclassmethod from_options(url, options, session)
\n
\n
Parameters:
\n
    \n
  • url (str)

  • \n
  • options (OpenLineageClientOptions)

  • \n
  • session (Session | None)

  • \n
\n
\n
Return type:
\n

HttpConfig

\n
\n
\n
\n
\n
\n
\nclass openlineage.client.transport.http.HttpTransport(config)
\n

Bases: Transport

\n
\n
Parameters:
\n

config (HttpConfig)

\n
\n
\n
\n
\nkind: str | None = 'http'
\n
\n
\n
\nconfig_class
\n

alias of HttpConfig

\n
\n
\n
\nset_adapter(adapter)
\n
\n
Parameters:
\n

adapter (HTTPAdapter)

\n
\n
Return type:
\n

None

\n
\n
\n
\n
\n
\nemit(event)
\n
\n
Parameters:
\n

event (Union[RunEvent, DatasetEvent, JobEvent, RunEvent, DatasetEvent, JobEvent])

\n
\n
Return type:
\n

Response

\n
\n
\n
\n
\n
\n
\n

openlineage.client.transport.kafka module

\n
\n
\nclass openlineage.client.transport.kafka.KafkaConfig(config, topic, messageKey=None, flush=True)
\n

Bases: Config

\n
\n
Parameters:
\n
    \n
  • config (dict[str, str])

  • \n
  • topic (str)

  • \n
  • messageKey (str | None)

  • \n
  • flush (bool)

  • \n
\n
\n
\n
\n
\nconfig: dict[str, str]
\n
\n
\n
\ntopic: str
\n
\n
\n
\nmessageKey: str | None
\n
\n
\n
\nflush: bool
\n
\n
\n
\nclassmethod from_dict(params)
\n
\n
Parameters:
\n

params (dict[str, Any])

\n
\n
Return type:
\n

_T

\n
\n
\n
\n
\n
\n
\nopenlineage.client.transport.kafka.on_delivery(err, msg)
\n
\n
Parameters:
\n
    \n
  • err (KafkaError)

  • \n
  • msg (Message)

  • \n
\n
\n
Return type:
\n

None

\n
\n
\n
\n
\n
\nclass openlineage.client.transport.kafka.KafkaTransport(config)
\n

Bases: Transport

\n
\n
Parameters:
\n

config (KafkaConfig)

\n
\n
\n
\n
\nkind: str | None = 'kafka'
\n
\n
\n
\nconfig_class
\n

alias of KafkaConfig

\n
\n
\n
\nemit(event)
\n
\n
Parameters:
\n

event (Event)

\n
\n
Return type:
\n

None

\n
\n
\n
\n
\n
\n
\n

openlineage.client.transport.msk_iam module

\n
\n
\nclass openlineage.client.transport.msk_iam.MSKIAMConfig(config, topic, messageKey=None, flush=True, region=None, aws_profile=None, role_arn=None, aws_debug_creds=False)
\n

Bases: KafkaConfig

\n
\n
Parameters:
\n
    \n
  • config (dict[str, str])

  • \n
  • topic (str)

  • \n
  • messageKey (str | None)

  • \n
  • flush (bool)

  • \n
  • region (str)

  • \n
  • aws_profile (None | str)

  • \n
  • role_arn (None | str)

  • \n
  • aws_debug_creds (bool)

  • \n
\n
\n
\n
\n
\nregion: str
\n
\n
\n
\naws_profile: None | str
\n
\n
\n
\nrole_arn: None | str
\n
\n
\n
\naws_debug_creds: bool
\n
\n
\n
\n
\nclass openlineage.client.transport.msk_iam.MSKIAMTransport(config)
\n

Bases: KafkaTransport

\n
\n
Parameters:
\n

config (MSKIAMConfig)

\n
\n
\n
\n
\nkind: str | None = 'msk-iam'
\n
\n
\n
\nconfig_class
\n

alias of MSKIAMConfig

\n
\n
\n
\n
\n

openlineage.client.transport.noop module

\n
\n
\nclass openlineage.client.transport.noop.NoopConfig
\n

Bases: Config

\n
\n
\n
\n
\n
\nclass openlineage.client.transport.noop.NoopTransport(config)
\n

Bases: Transport

\n
\n
Parameters:
\n

config (NoopConfig)

\n
\n
\n
\n
\nkind: str | None = 'noop'
\n
\n
\n
\nconfig_class
\n

alias of NoopConfig

\n
\n
\n
\nemit(event)
\n
\n
Parameters:
\n

event (Union[RunEvent, DatasetEvent, JobEvent, RunEvent, DatasetEvent, JobEvent])

\n
\n
Return type:
\n

None

\n
\n
\n
\n
\n
\n
\n

openlineage.client.transport.transport module

\n

To implement custom Transport implement Config and Transport classes.

\n
\n
Transport needs to
    \n
  • specify class variable config that will point to Config class that Transport requires

  • \n
  • __init__ that will accept specified Config class instance

  • \n
  • implement emit method that will accept RunEvent

  • \n
\n
\n
\n

Config file is read and parameters there are passed to from_dict classmethod.\nThe config class can have more complex attributes, but needs to be able to\ninstantiate them in from_dict method.

\n

DefaultTransportFactory instantiates custom transports by looking at type field in\nclass config.

\n
\n
\nclass openlineage.client.transport.transport.Config
\n

Bases: object

\n
\n
\n
\n
\nclassmethod from_dict(params)
\n
\n
Parameters:
\n

params (dict[str, Any])

\n
\n
Return type:
\n

_T

\n
\n
\n
\n
\n
\n
\nclass openlineage.client.transport.transport.Transport
\n

Bases: object

\n
\n
\nkind: str | None = None
\n
\n
\n
\nname: str | None = None
\n
\n
\n
\nconfig_class
\n

alias of Config

\n
\n
\n
\nemit(event)
\n
\n
Parameters:
\n

event (Event)

\n
\n
Return type:
\n

Any

\n
\n
\n
\n
\n
\n
\nclass openlineage.client.transport.transport.TransportFactory
\n

Bases: object

\n
\n
\ncreate(config=None)
\n
\n
Parameters:
\n

config (dict[str, str] | None)

\n
\n
Return type:
\n

Transport

\n
\n
\n
\n
\n
\n
\n
"}}> diff --git a/docs/guides/spark.md b/docs/guides/spark.md index 261fc5f..387e088 100644 --- a/docs/guides/spark.md +++ b/docs/guides/spark.md @@ -13,7 +13,7 @@ This guide was developed using an **earlier version** of this integration and ma Adding OpenLineage to Spark is refreshingly uncomplicated, and this is thanks to Spark's SparkListener interface. OpenLineage integrates with Spark by implementing SparkListener and collecting information about jobs executed inside a Spark application. To activate the listener, add the following properties to your Spark configuration in your cluster's `spark-defaults.conf` file or, alternatively, add them to specific jobs on submission via the `spark-submit` command: ``` -spark.jars.packages io.openlineage:openlineage-spark:1.23.0 +spark.jars.packages io.openlineage:openlineage-spark:{{PREPROCESSOR:OPENLINEAGE_VERSION}} spark.extraListeners io.openlineage.spark.agent.OpenLineageSparkListener ``` @@ -91,7 +91,7 @@ spark = (SparkSession.builder.master('local').appName('openlineage_spark_test') .config('spark.jars', ",".join(files)) # Install and set up the OpenLineage listener - .config('spark.jars.packages', 'io.openlineage:openlineage-spark:1.23.0') + .config('spark.jars.packages', 'io.openlineage:openlineage-spark:{{PREPROCESSOR:OPENLINEAGE_VERSION}}') .config('spark.extraListeners', 'io.openlineage.spark.agent.OpenLineageSparkListener') .config('spark.openlineage.transport.url', 'http://marquez-api:5000') .config('spark.openlineage.transport.type', 'http') diff --git a/docs/integrations/spark/installation.md b/docs/integrations/spark/installation.md index 4b252ec..bf187f0 100644 --- a/docs/integrations/spark/installation.md +++ b/docs/integrations/spark/installation.md @@ -13,7 +13,7 @@ import TabItem from '@theme/TabItem'; The above necessitates a change in the artifact identifier for `io.openlineage:openlineage-spark`. After version `1.8.0`, the artifact identifier has been updated. For subsequent versions, utilize: -`io.openlineage:openlineage-spark_${SCALA_BINARY_VERSION}:${OPENLINEAGE_SPARK_VERSION}`. +`io.openlineage:openlineage-spark_${SCALA_BINARY_VERSION}:{{PREPROCESSOR:OPENLINEAGE_VERSION}}`. ::: To integrate OpenLineage Spark with your application, you can: @@ -39,7 +39,7 @@ For Maven, add the following to your `pom.xml`: io.openlineage openlineage-spark_${SCALA_BINARY_VERSION} - ${OPENLINEAGE_SPARK_VERSION} + {{PREPROCESSOR:OPENLINEAGE_VERSION}} ``` @@ -63,14 +63,14 @@ For Gradle, add this to your `build.gradle`: ```groovy -implementation("io.openlineage:openlineage-spark_${SCALA_BINARY_VERSION}:${OPENLINEAGE_SPARK_VERSION}") +implementation("io.openlineage:openlineage-spark_${SCALA_BINARY_VERSION}:{{PREPROCESSOR:OPENLINEAGE_VERSION}}") ``` ```groovy -implementation("io.openlineage:openlineage-spark:${OPENLINEAGE_SPARK_VERSION}") +implementation("io.openlineage:openlineage-spark:{{PREPROCESSOR:OPENLINEAGE_VERSION}}") ``` @@ -100,7 +100,7 @@ if [ -z "$SPARK_HOME" ]; then exit 1 fi -OPENLINEAGE_SPARK_VERSION='1.9.0' # Example version +OPENLINEAGE_SPARK_VERSION='{{PREPROCESSOR:OPENLINEAGE_VERSION}}' SCALA_BINARY_VERSION='2.13' # Example Scala version ARTIFACT_ID="openlineage-spark_${SCALA_BINARY_VERSION}" JAR_NAME="${ARTIFACT_ID}-${OPENLINEAGE_SPARK_VERSION}.jar" @@ -172,7 +172,7 @@ This script demonstrate this process: ```bash #!/usr/bin/env bash -OPENLINEAGE_SPARK_VERSION='1.9.0' # Example version +OPENLINEAGE_SPARK_VERSION='{{PREPROCESSOR:OPENLINEAGE_VERSION}}' SCALA_BINARY_VERSION='2.13' # Example Scala version ARTIFACT_ID="openlineage-spark_${SCALA_BINARY_VERSION}" JAR_NAME="${ARTIFACT_ID}-${OPENLINEAGE_SPARK_VERSION}.jar" @@ -237,10 +237,10 @@ during runtime and adds it to the classpath of your Spark application. ```bash -OPENLINEAGE_SPARK_VERSION='1.9.0' # Example version +OPENLINEAGE_SPARK_VERSION='{{PREPROCESSOR:OPENLINEAGE_VERSION}}' SCALA_BINARY_VERSION='2.13' # Example Scala version -spark-submit --packages "io.openlineage:openlineage-spark_${SCALA_BINARY_VERSION}:${OPENLINEAGE_SPARK_VERSION}" \ +spark-submit --packages "io.openlineage:openlineage-spark_${SCALA_BINARY_VERSION}:{{PREPROCESSOR:OPENLINEAGE_VERSION}}" \ # ... other options ``` @@ -250,7 +250,7 @@ spark-submit --packages "io.openlineage:openlineage-spark_${SCALA_BINARY_VERSION ```bash OPENLINEAGE_SPARK_VERSION='1.8.0' # Example version -spark-submit --packages "io.openlineage:openlineage-spark::${OPENLINEAGE_SPARK_VERSION}" \ +spark-submit --packages "io.openlineage:openlineage-spark::{{PREPROCESSOR:OPENLINEAGE_VERSION}}" \ # ... other options ``` diff --git a/docs/integrations/spark/quickstart/quickstart_local.md b/docs/integrations/spark/quickstart/quickstart_local.md index 95e6a85..5cd41de 100644 --- a/docs/integrations/spark/quickstart/quickstart_local.md +++ b/docs/integrations/spark/quickstart/quickstart_local.md @@ -44,7 +44,7 @@ from pyspark.sql import SparkSession spark = (SparkSession.builder.master('local') .appName('sample_spark') .config('spark.extraListeners', 'io.openlineage.spark.agent.OpenLineageSparkListener') - .config('spark.jars.packages', 'io.openlineage:openlineage-spark:1.7.0') + .config('spark.jars.packages', 'io.openlineage:openlineage-spark:{{PREPROCESSOR:OPENLINEAGE_VERSION}}') .config('spark.openlineage.transport.type', 'console') .getOrCreate()) ``` diff --git a/docs/spec/producers.md b/docs/spec/producers.md index 1e00458..9e9f64f 100644 --- a/docs/spec/producers.md +++ b/docs/spec/producers.md @@ -10,4 +10,4 @@ This page could use some extra detail! You're welcome to contribute using the Ed The `_producer` value is included in an OpenLineage request as a way to know how the metadata was generated. It is a URI that links to a source code SHA or the location where a package can be found. -For example, this field is populated by many of the common integrations. For example, the dbt integration will set this value to `https://github.com/OpenLineage/OpenLineage/tree/{__version__}/integration/dbt` and the Python client will set it to `https://github.com/OpenLineage/OpenLineage/tree/{__version__}/client/python`. \ No newline at end of file +For example, this field is populated by many of the common integrations. For example, the dbt integration will set this value to `https://github.com/OpenLineage/OpenLineage/tree/{{PREPROCESSOR:OPENLINEAGE_VERSION}}/integration/dbt` and the Python client will set it to `https://github.com/OpenLineage/OpenLineage/tree/{{PREPROCESSOR:OPENLINEAGE_VERSION}}/client/python`. \ No newline at end of file diff --git a/docusaurus.config.js b/docusaurus.config.js index d0db47d..437bc95 100644 --- a/docusaurus.config.js +++ b/docusaurus.config.js @@ -2,151 +2,173 @@ // Note: type annotations allow type checking and IDEs autocompletion const prism = require('prism-react-renderer'); +const path = require('path'); +const fs = require('fs'); + +const getCurrentVersion = () => { + // The file is created when "docusaurus docs:version " is executed. + const versionsPath = path.resolve(__dirname, 'versions.json'); + if (fs.existsSync(versionsPath)) { + return require(versionsPath)[0]; + } else { + console.warn("The documentation is not versioned. Run \"docusaurus docs:version \"") + return ""; + } +} + +// Replaces every occurrence of "{{PREPROCESSOR:OPENLINEAGE_VERSION}}" with the current version of the documentation. +const openLineageVersionProvider = ({filePath, fileContent}) => { + return fileContent.replace(/{{PREPROCESSOR:OPENLINEAGE_VERSION}}/g, getCurrentVersion()); +}; const links = [ - { to: '/getting-started', label: 'Getting Started', position: 'left' }, - { to: '/resources', label: 'Resources', position: 'left' }, - { to: '/ecosystem', label: 'Ecosystem', position: 'left' }, - { to: '/community', label: 'Community', position: 'left' }, - { to: '/blog', label: 'Blog', position: 'left' }, - { to: '/docs', label: 'Docs', position: 'left' }, - { to: '/survey', label: 'Ecosystem Survey 2023', position: 'left' }, + {to: '/getting-started', label: 'Getting Started', position: 'left'}, + {to: '/resources', label: 'Resources', position: 'left'}, + {to: '/ecosystem', label: 'Ecosystem', position: 'left'}, + {to: '/community', label: 'Community', position: 'left'}, + {to: '/blog', label: 'Blog', position: 'left'}, + {to: '/docs', label: 'Docs', position: 'left'}, + {to: '/survey', label: 'Ecosystem Survey 2023', position: 'left'}, ] const linksSocial = [ - { href: 'https://fosstodon.org/@openlineage', label: 'Mastodon', rel: 'me' }, - { href: 'https://twitter.com/OpenLineage', label: 'Twitter' }, - { href: 'https://www.linkedin.com/groups/13927795/', label: 'LinkedIn'}, - { href: 'http://bit.ly/OpenLineageSlack', label: 'Slack' }, - { href: 'https://github.com/OpenLineage/OpenLineage', label: 'GitHub' } + {href: 'https://fosstodon.org/@openlineage', label: 'Mastodon', rel: 'me'}, + {href: 'https://twitter.com/OpenLineage', label: 'Twitter'}, + {href: 'https://www.linkedin.com/groups/13927795/', label: 'LinkedIn'}, + {href: 'http://bit.ly/OpenLineageSlack', label: 'Slack'}, + {href: 'https://github.com/OpenLineage/OpenLineage', label: 'GitHub'} ] /** @type {import('@docusaurus/types').Config} */ const config = { - title: 'OpenLineage', - tagline: 'OpenLineage', - url: 'https://openlineage.io', - baseUrl: '/', - onBrokenLinks: 'throw', - onBrokenMarkdownLinks: 'throw', - favicon: 'img/favicon.ico', - customFields: { - links: links, - linksSocial: linksSocial - }, + title: 'OpenLineage', + tagline: 'OpenLineage', + url: 'https://openlineage.io', + baseUrl: '/', + onBrokenLinks: 'throw', + onBrokenMarkdownLinks: 'throw', + favicon: 'img/favicon.ico', + customFields: { + links: links, + linksSocial: linksSocial + }, - organizationName: 'openlineage', - projectName: 'docs', - i18n: { - defaultLocale: 'en', - locales: ['en'], - }, + organizationName: 'openlineage', + projectName: 'docs', + i18n: { + defaultLocale: 'en', + locales: ['en'], + }, - presets: [ - [ - 'classic', - /** @type {import('@docusaurus/preset-classic').Options} */ - ({ - docs: { - sidebarPath: require.resolve('./sidebars.js'), - exclude: ['**/partials/**'], - editUrl: - 'https://github.com/OpenLineage/OpenLineage/tree/main/website/', - }, - theme: { - customCss: require.resolve('./src/css/custom.css'), - }, - blog: { - blogTitle: 'Blog', - blogDescription: 'Data lineage is the foundation for a new generation of powerful, context-aware data tools and best practices. OpenLineage enables consistent collection of lineage metadata, creating a deeper understanding of how data is produced and used.', - showReadingTime: true, - blogSidebarCount: 5, - blogSidebarTitle: 'Recent posts', - feedOptions: { - type: ['json'], - copyright: `Copyright © ${new Date().getFullYear()} The Linux Foundation®. All rights reserved.`, - }, - }, - pages: { - path: 'src/pages', - include: ['**/*.{js,jsx,ts,tsx,md,mdx}'], - exclude: [ - 'home.tsx', // this page served from plugin - '**/_*.{js,jsx,ts,tsx,md,mdx}', - '**/_*.{js,jsx,ts,tsx,md,mdx}', - '**/_*/**', - '**/*.test.{js,jsx,ts,tsx}', - '**/__tests__/**', - ], - mdxPageComponent: '@theme/MDXPage', - }, - gtag: { - trackingID: 'G-QMTWMLMX4M', - anonymizeIP: true, - }, - }), + presets: [ + [ + 'classic', + /** @type {import('@docusaurus/preset-classic').Options} */ + ({ + docs: { + sidebarPath: require.resolve('./sidebars.js'), + exclude: ['**/partials/**'], + editUrl: + 'https://github.com/OpenLineage/OpenLineage/tree/main/website/', + }, + theme: { + customCss: require.resolve('./src/css/custom.css'), + }, + blog: { + blogTitle: 'Blog', + blogDescription: 'Data lineage is the foundation for a new generation of powerful, context-aware data tools and best practices. OpenLineage enables consistent collection of lineage metadata, creating a deeper understanding of how data is produced and used.', + showReadingTime: true, + blogSidebarCount: 5, + blogSidebarTitle: 'Recent posts', + feedOptions: { + type: ['json'], + copyright: `Copyright © ${new Date().getFullYear()} The Linux Foundation®. All rights reserved.`, + }, + }, + pages: { + path: 'src/pages', + include: ['**/*.{js,jsx,ts,tsx,md,mdx}'], + exclude: [ + 'home.tsx', // this page served from plugin + '**/_*.{js,jsx,ts,tsx,md,mdx}', + '**/_*.{js,jsx,ts,tsx,md,mdx}', + '**/_*/**', + '**/*.test.{js,jsx,ts,tsx}', + '**/__tests__/**', + ], + mdxPageComponent: '@theme/MDXPage', + }, + gtag: { + trackingID: 'G-QMTWMLMX4M', + anonymizeIP: true, + }, + }), + ], ], - ], - plugins: [ - function tailwindcssPlugin(ctx, options) { - return { - name: "docusaurus-tailwindcss", - configurePostCss(postcssOptions) { - // Appends TailwindCSS and AutoPrefixer. - postcssOptions.plugins.push(require("tailwindcss")); - postcssOptions.plugins.push(require("autoprefixer")); - return postcssOptions; + plugins: [ + function tailwindcssPlugin(ctx, options) { + return { + name: "docusaurus-tailwindcss", + configurePostCss(postcssOptions) { + // Appends TailwindCSS and AutoPrefixer. + postcssOptions.plugins.push(require("tailwindcss")); + postcssOptions.plugins.push(require("autoprefixer")); + return postcssOptions; + }, + }; }, - }; - }, - [ - "./plugins/home-blog-plugin", - { - id: "blogs", - routeBasePath: "/", - path: "./blogs" - }, + [ + "./plugins/home-blog-plugin", + { + id: "blogs", + routeBasePath: "/", + path: "./blogs" + }, + ], + require.resolve('docusaurus-lunr-search') ], - require.resolve('docusaurus-lunr-search') - ], - themeConfig: + themeConfig: /** @type {import('@docusaurus/preset-classic').ThemeConfig} */ - ({ - navbar: { - logo: { - alt: 'OpenLineage', - src: 'img/ol-logo.svg', + ({ + navbar: { + logo: { + alt: 'OpenLineage', + src: 'img/ol-logo.svg', + }, + items: [ + ...links, + { + href: 'https://github.com/OpenLineage/openlineage', + label: 'GitHub', + position: 'right', + } + ], + }, + prism: { + theme: prism.themes.github, + darkTheme: prism.themes.dracula, + additionalLanguages: ['java'], + }, + colorMode: { + defaultMode: 'light', + disableSwitch: true, + respectPrefersColorScheme: false, + }, + }), + + scripts: [ + { + src: 'https://plausible.io/js/script.js', + defer: true, + 'data-domain': 'openlineage.io', }, - items: [ - ...links, - { - href: 'https://github.com/OpenLineage/openlineage', - label: 'GitHub', - position: 'right', - } - ], - }, - prism: { - theme: prism.themes.github, - darkTheme: prism.themes.dracula, - additionalLanguages: ['java'], - }, - colorMode: { - defaultMode: 'light', - disableSwitch: true, - respectPrefersColorScheme: false, - }, - }), + ], - scripts: [ - { - src: 'https://plausible.io/js/script.js', - defer: true, - 'data-domain': 'openlineage.io', - }, - ], + markdown: { + preprocessor: openLineageVersionProvider + } }; module.exports = config;