From 13239f97c98618827a3731045f35b0bc05d817ff Mon Sep 17 00:00:00 2001
From: GitHub Action Website Snapshot <>
Date: Mon, 18 Nov 2024 07:07:14 +0000
Subject: [PATCH] Refreshing website content from main repo.

  Source commit:
  https://github.com/OpenLineage/OpenLineage/commit/fbc00b34404e261878eb2824fad8fa5ebbaec7a6
---
 .../integrations/spark/configuration/usage.md | 72 ++++++++++++++++++-
 1 file changed, 71 insertions(+), 1 deletion(-)
diff --git a/docs/integrations/spark/configuration/usage.md b/docs/integrations/spark/configuration/usage.md
index f4f1d79..6ef8cfa 100644
--- a/docs/integrations/spark/configuration/usage.md
+++ b/docs/integrations/spark/configuration/usage.md
@@ -7,7 +7,7 @@ title: Usage
 import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
 
-Configuring the OpenLineage Spark integration is straightforward. It uses built-in Spark configuration mechanisms.
+Configuring the OpenLineage Spark integration is straightforward. It uses built-in Spark configuration mechanisms. However, for **Databricks users**, special considerations are required to ensure compatibility and avoid breaking the Spark UI after a cluster shutdown.
 
 Your options are:
 
@@ -27,6 +27,10 @@ The setting `config("spark.extraListeners", "io.openlineage.spark.agent.OpenLine
 the integration ineffective.
 :::
 
+:::note 
+Databricks For Databricks users, you must include `com.databricks.backend.daemon.driver.DBCEventLoggingListener` in addition to `io.openlineage.spark.agent.OpenLineageSparkListener` in the `spark.extraListeners` setting. Failure to do so will make the Spark UI inaccessible after a cluster shutdown.
+:::
+
 <Tabs groupId="spark-app-conf">
 <TabItem value="scala" label="Scala">
 
@@ -50,6 +54,27 @@ object OpenLineageExample extends App {
 
   spark.stop()
 }
+
+// For Databricks
+import org.apache.spark.sql.SparkSession
+
+object OpenLineageExample extends App {
+  val spark = SparkSession.builder()
+    .appName("OpenLineageExample")
+    // This line is EXTREMELY important
+    .config("spark.extraListeners", "io.openlineage.spark.agent.OpenLineageSparkListener,com.databricks.backend.daemon.driver.DBCEventLoggingListener")
+    .config("spark.openlineage.transport.type", "http")
+    .config("spark.openlineage.transport.url", "http://localhost:5000")
+    .config("spark.openlineage.namespace", "spark_namespace")
+    .config("spark.openlineage.parentJobNamespace", "airflow_namespace")
+    .config("spark.openlineage.parentJobName", "airflow_dag.airflow_task")
+    .config("spark.openlineage.parentRunId", "xxxx-xxxx-xxxx-xxxx")
+    .getOrCreate()
+
+  // ... your code
+
+  spark.stop()
+}
 ```
 
 </TabItem>
@@ -71,6 +96,24 @@ spark = SparkSession.builder
 
 # ... your code
 
+spark.stop()
+
+# For Databricks
+from pyspark.sql import SparkSession
+
+spark = SparkSession.builder
+    .appName("OpenLineageExample")
+    .config("spark.extraListeners", "io.openlineage.spark.agent.OpenLineageSparkListener,com.databricks.backend.daemon.driver.DBCEventLoggingListener")
+    .config("spark.openlineage.transport.type", "http")
+    .config("spark.openlineage.transport.url", "http://localhost:5000")
+    .config("spark.openlineage.namespace", "spark_namespace")
+    .config("spark.openlineage.parentJobNamespace", "airflow_namespace")
+    .config("spark.openlineage.parentJobName", "airflow_dag.airflow_task")
+    .config("spark.openlineage.parentRunId", "xxxx-xxxx-xxxx-xxxx")
+    .getOrCreate()
+
+# ... your code
+
 spark.stop()
 ```
 
@@ -81,6 +124,10 @@ spark.stop()
 
 The below example demonstrates how to use the `--conf` option with `spark-submit`.
 
+:::note 
+Databricks Remember to include `com.databricks.backend.daemon.driver.DBCEventLoggingListener` along with the OpenLineage listener. 
+:::
+
 ```bash
 spark-submit \
   --conf "spark.extraListeners=io.openlineage.spark.agent.OpenLineageSparkListener" \
@@ -91,6 +138,17 @@ spark-submit \
   --conf "spark.openlineage.parentJobName=airflow_dag.airflow_task" \
   --conf "spark.openlineage.parentRunId=xxxx-xxxx-xxxx-xxxx" \
   # ... other options
+
+# For Databricks
+spark-submit \
+  --conf "spark.extraListeners=io.openlineage.spark.agent.OpenLineageSparkListener,com.databricks.backend.daemon.driver.DBCEventLoggingListener" \
+  --conf "spark.openlineage.transport.type=http" \
+  --conf "spark.openlineage.transport.url=http://localhost:5000" \
+  --conf "spark.openlineage.namespace=spark_namespace" \
+  --conf "spark.openlineage.parentJobNamespace=airflow_namespace" \
+  --conf "spark.openlineage.parentJobName=airflow_dag.airflow_task" \
+  --conf "spark.openlineage.parentRunId=xxxx-xxxx-xxxx-xxxx" \
+  # ... other options
 ```
 
 #### Adding properties to the `spark-defaults.conf` file in the `${SPARK_HOME}/conf` directory
@@ -104,6 +162,10 @@ installation, particularly in a shared environment.
 
 The below example demonstrates how to add properties to the `spark-defaults.conf` file.
 
+:::note 
+Databricks For Databricks users, include `com.databricks.backend.daemon.driver.DBCEventLoggingListener` in the `spark.extraListeners` property.
+:::
+
 ```properties
 spark.extraListeners=io.openlineage.spark.agent.OpenLineageSparkListener
 spark.openlineage.transport.type=http
@@ -111,6 +173,14 @@ spark.openlineage.transport.url=http://localhost:5000
 spark.openlineage.namespace=MyNamespace
 ```
 
+For Databricks,
+```properties
+spark.extraListeners=io.openlineage.spark.agent.OpenLineageSparkListener,com.databricks.backend.daemon.driver.DBCEventLoggingListener
+spark.openlineage.transport.type=http
+spark.openlineage.transport.url=http://localhost:5000
+spark.openlineage.namespace=MyNamespace
+```
+
 :::info
 The `spark.extraListeners` configuration parameter is **non-additive**. This means that if you set
 `spark.extraListeners` via the CLI or via `SparkSession#config`, it will **replace** the value