diff --git a/.github/workflows/publish-snapshot.yml b/.github/workflows/publish-snapshot.yml
index c2809cfdcbac..0d7147b24a67 100644
--- a/.github/workflows/publish-snapshot.yml
+++ b/.github/workflows/publish-snapshot.yml
@@ -41,4 +41,4 @@ jobs:
- run: |
./gradlew printVersion
./gradlew -DallVersions publishApachePublicationToMavenRepository -PmavenUser=${{ secrets.NEXUS_USER }} -PmavenPassword=${{ secrets.NEXUS_PW }}
- ./gradlew -DflinkVersions= -DsparkVersions=3.2,3.3,3.4,3.5 -DscalaVersion=2.13 -DhiveVersions= publishApachePublicationToMavenRepository -PmavenUser=${{ secrets.NEXUS_USER }} -PmavenPassword=${{ secrets.NEXUS_PW }}
+ ./gradlew -DflinkVersions= -DsparkVersions=3.3,3.4,3.5 -DscalaVersion=2.13 -DhiveVersions= publishApachePublicationToMavenRepository -PmavenUser=${{ secrets.NEXUS_USER }} -PmavenPassword=${{ secrets.NEXUS_PW }}
diff --git a/.github/workflows/spark-ci.yml b/.github/workflows/spark-ci.yml
index 3dacdf77c1a8..c77f95fe7aa3 100644
--- a/.github/workflows/spark-ci.yml
+++ b/.github/workflows/spark-ci.yml
@@ -58,7 +58,7 @@ jobs:
strategy:
matrix:
jvm: [8, 11]
- spark: ['3.2', '3.3', '3.4', '3.5']
+ spark: ['3.3', '3.4', '3.5']
env:
SPARK_LOCAL_IP: localhost
steps:
@@ -88,7 +88,7 @@ jobs:
strategy:
matrix:
jvm: [8, 11]
- spark: ['3.2','3.3','3.4','3.5']
+ spark: ['3.3','3.4','3.5']
env:
SPARK_LOCAL_IP: localhost
steps:
diff --git a/.gitignore b/.gitignore
index 6fb0b51d045c..23febc6ccf6b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -27,7 +27,6 @@ lib/
site/site
# benchmark output
-spark/v3.2/spark/benchmark/*
spark/v3.3/spark/benchmark/*
spark/v3.3/spark-extensions/benchmark/*
spark/v3.4/spark/benchmark/*
diff --git a/dev/stage-binaries.sh b/dev/stage-binaries.sh
index 5509a2ea3223..b7cd1a37ace9 100755
--- a/dev/stage-binaries.sh
+++ b/dev/stage-binaries.sh
@@ -20,14 +20,13 @@
SCALA_VERSION=2.12
FLINK_VERSIONS=1.16,1.17,1.18
-SPARK_VERSIONS=3.2,3.3,3.4,3.5
+SPARK_VERSIONS=3.3,3.4,3.5
HIVE_VERSIONS=2,3
./gradlew -Prelease -DscalaVersion=$SCALA_VERSION -DflinkVersions=$FLINK_VERSIONS -DsparkVersions=$SPARK_VERSIONS -DhiveVersions=$HIVE_VERSIONS publishApachePublicationToMavenRepository
# Also publish Scala 2.13 Artifacts for versions that support it.
# Flink does not yet support 2.13 (and is largely dropping a user-facing dependency on Scala). Hive doesn't need a Scala specification.
-./gradlew -Prelease -DscalaVersion=2.13 -DsparkVersions=3.2 :iceberg-spark:iceberg-spark-3.2_2.13:publishApachePublicationToMavenRepository :iceberg-spark:iceberg-spark-extensions-3.2_2.13:publishApachePublicationToMavenRepository :iceberg-spark:iceberg-spark-runtime-3.2_2.13:publishApachePublicationToMavenRepository
./gradlew -Prelease -DscalaVersion=2.13 -DsparkVersions=3.3 :iceberg-spark:iceberg-spark-3.3_2.13:publishApachePublicationToMavenRepository :iceberg-spark:iceberg-spark-extensions-3.3_2.13:publishApachePublicationToMavenRepository :iceberg-spark:iceberg-spark-runtime-3.3_2.13:publishApachePublicationToMavenRepository
./gradlew -Prelease -DscalaVersion=2.13 -DsparkVersions=3.4 :iceberg-spark:iceberg-spark-3.4_2.13:publishApachePublicationToMavenRepository :iceberg-spark:iceberg-spark-extensions-3.4_2.13:publishApachePublicationToMavenRepository :iceberg-spark:iceberg-spark-runtime-3.4_2.13:publishApachePublicationToMavenRepository
./gradlew -Prelease -DscalaVersion=2.13 -DsparkVersions=3.5 :iceberg-spark:iceberg-spark-3.5_2.13:publishApachePublicationToMavenRepository :iceberg-spark:iceberg-spark-extensions-3.5_2.13:publishApachePublicationToMavenRepository :iceberg-spark:iceberg-spark-runtime-3.5_2.13:publishApachePublicationToMavenRepository
diff --git a/docs/dell.md b/docs/dell.md
index 401240ab294d..2560332e5bed 100644
--- a/docs/dell.md
+++ b/docs/dell.md
@@ -58,11 +58,11 @@ Even though the [Dell ECS client](https://github.com/EMCECS/ecs-object-client-ja
### Spark
-To use the Dell ECS catalog with Spark 3.2.1, you should create a Spark session like:
+To use the Dell ECS catalog with Spark 3.5.0, you should create a Spark session like:
```bash
-ICEBERG_VERSION=0.15.0
-SPARK_VERSION=3.2_2.12
+ICEBERG_VERSION=1.4.2
+SPARK_VERSION=3.5_2.12
ECS_CLIENT_VERSION=3.3.2
DEPENDENCIES="org.apache.iceberg:iceberg-spark-runtime-${SPARK_VERSION}:${ICEBERG_VERSION},\
diff --git a/docs/jdbc.md b/docs/jdbc.md
index f7f70043d2c2..40ee049759be 100644
--- a/docs/jdbc.md
+++ b/docs/jdbc.md
@@ -50,7 +50,7 @@ the JDBC catalog allows arbitrary configurations through:
You can start a Spark session with a MySQL JDBC connection using the following configurations:
```shell
-spark-sql --packages org.apache.iceberg:iceberg-spark-runtime-3.2_2.12:{{% icebergVersion %}} \
+spark-sql --packages org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:{{% icebergVersion %}} \
--conf spark.sql.catalog.my_catalog=org.apache.iceberg.spark.SparkCatalog \
--conf spark.sql.catalog.my_catalog.warehouse=s3://my-bucket/my/key/prefix \
--conf spark.sql.catalog.my_catalog.catalog-impl=org.apache.iceberg.jdbc.JdbcCatalog \
diff --git a/docs/spark-getting-started.md b/docs/spark-getting-started.md
index 2181712c9f33..5789d5b704d2 100644
--- a/docs/spark-getting-started.md
+++ b/docs/spark-getting-started.md
@@ -40,11 +40,11 @@ You can also view documentations of using Iceberg with other compute engine unde
To use Iceberg in a Spark shell, use the `--packages` option:
```sh
-spark-shell --packages org.apache.iceberg:iceberg-spark-runtime-3.2_2.12:{{% icebergVersion %}}
+spark-shell --packages org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:{{% icebergVersion %}}
```
{{< hint info >}}
-If you want to include Iceberg in your Spark installation, add the [`iceberg-spark-runtime-3.2_2.12` Jar](https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-spark-runtime-3.2_2.12/{{% icebergVersion %}}/iceberg-spark-runtime-3.2_2.12-{{% icebergVersion %}}.jar) to Spark's `jars` folder.
+If you want to include Iceberg in your Spark installation, add the [`iceberg-spark-runtime-3.5_2.12` Jar](https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-spark-runtime-3.5_2.12/{{% icebergVersion %}}/iceberg-spark-runtime-3.5_2.12-{{% icebergVersion %}}.jar) to Spark's `jars` folder.
{{< /hint >}}
### Adding catalogs
@@ -54,7 +54,7 @@ Iceberg comes with [catalogs](../spark-configuration#catalogs) that enable SQL c
This command creates a path-based catalog named `local` for tables under `$PWD/warehouse` and adds support for Iceberg tables to Spark's built-in catalog:
```sh
-spark-sql --packages org.apache.iceberg:iceberg-spark-runtime-3.2_2.12:{{% icebergVersion %}}\
+spark-sql --packages org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:{{% icebergVersion %}}\
--conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions \
--conf spark.sql.catalog.spark_catalog=org.apache.iceberg.spark.SparkSessionCatalog \
--conf spark.sql.catalog.spark_catalog.type=hive \
diff --git a/docs/spark-procedures.md b/docs/spark-procedures.md
index 5abf947f2141..32916a2df677 100644
--- a/docs/spark-procedures.md
+++ b/docs/spark-procedures.md
@@ -354,7 +354,7 @@ Iceberg can compact data files in parallel using Spark with the `rewriteDataFile
|---------------|-----------|------|-------------|
| `table` | ✔️ | string | Name of the table to update |
| `strategy` | | string | Name of the strategy - binpack or sort. Defaults to binpack strategy |
-| `sort_order` | | string | For Zorder use a comma separated list of columns within zorder(). (Supported in Spark 3.2 and Above) Example: zorder(c1,c2,c3). Else, Comma separated sort orders in the format (ColumnName SortDirection NullOrder). Where SortDirection can be ASC or DESC. NullOrder can be NULLS FIRST or NULLS LAST. Defaults to the table's sort order |
+| `sort_order` | | string | For Zorder use a comma separated list of columns within zorder(). Example: zorder(c1,c2,c3). Else, Comma separated sort orders in the format (ColumnName SortDirection NullOrder). Where SortDirection can be ASC or DESC. NullOrder can be NULLS FIRST or NULLS LAST. Defaults to the table's sort order |
| `options` | ️ | map | Options to be used for actions|
| `where` | ️ | string | predicate as a string used for filtering the files. Note that all files that may contain data matching the filter will be selected for rewriting|
diff --git a/docs/spark-queries.md b/docs/spark-queries.md
index 38d989f003f6..c55d70e9e4b8 100644
--- a/docs/spark-queries.md
+++ b/docs/spark-queries.md
@@ -206,10 +206,6 @@ To inspect a table's history, snapshots, and other metadata, Iceberg supports me
Metadata tables are identified by adding the metadata table name after the original table name. For example, history for `db.table` is read using `db.table.history`.
-{{< hint info >}}
-For Spark 3, prior to 3.2, the Spark [session catalog](../spark-configuration#replacing-the-session-catalog) does not support table names with multipart identifiers such as `catalog.database.table.metadata`. As a workaround, configure an `org.apache.iceberg.spark.SparkCatalog`, or use the Spark `DataFrameReader` API.
-{{< /hint >}}
-
### History
To show table history:
diff --git a/gradle.properties b/gradle.properties
index 84644da7c4ba..ea857e7f275e 100644
--- a/gradle.properties
+++ b/gradle.properties
@@ -21,7 +21,7 @@ systemProp.knownFlinkVersions=1.16,1.17,1.18
systemProp.defaultHiveVersions=2
systemProp.knownHiveVersions=2,3
systemProp.defaultSparkVersions=3.5
-systemProp.knownSparkVersions=3.2,3.3,3.4,3.5
+systemProp.knownSparkVersions=3.3,3.4,3.5
systemProp.defaultScalaVersion=2.12
systemProp.knownScalaVersions=2.12,2.13
org.gradle.parallel=true
diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml
index 47dc322a5983..6dc67ce090bf 100644
--- a/gradle/libs.versions.toml
+++ b/gradle/libs.versions.toml
@@ -76,7 +76,6 @@ s3mock-junit5 = "2.11.0"
scala-collection-compat = "2.11.0"
slf4j = "1.7.36"
snowflake-jdbc = "3.14.4"
-spark-hive32 = "3.2.2"
spark-hive33 = "3.3.3"
spark-hive34 = "3.4.2"
spark-hive35 = "3.5.0"
diff --git a/jmh.gradle b/jmh.gradle
index aa60b93b331d..1a28ee0083e1 100644
--- a/jmh.gradle
+++ b/jmh.gradle
@@ -25,10 +25,6 @@ def sparkVersions = (System.getProperty("sparkVersions") != null ? System.getPro
def scalaVersion = System.getProperty("scalaVersion") != null ? System.getProperty("scalaVersion") : System.getProperty("defaultScalaVersion")
def jmhProjects = [project(":iceberg-core")]
-if (sparkVersions.contains("3.2")) {
- jmhProjects.add(project(":iceberg-spark:iceberg-spark-3.2_${scalaVersion}"))
-}
-
if (sparkVersions.contains("3.3")) {
jmhProjects.add(project(":iceberg-spark:iceberg-spark-3.3_${scalaVersion}"))
jmhProjects.add(project(":iceberg-spark:iceberg-spark-extensions-3.3_${scalaVersion}"))
diff --git a/settings.gradle b/settings.gradle
index a487fe1ffdac..d2c64da78a36 100644
--- a/settings.gradle
+++ b/settings.gradle
@@ -133,18 +133,6 @@ if (flinkVersions.contains("1.18")) {
project(":iceberg-flink:flink-runtime-1.18").name = "iceberg-flink-runtime-1.18"
}
-if (sparkVersions.contains("3.2")) {
- include ":iceberg-spark:spark-3.2_${scalaVersion}"
- include ":iceberg-spark:spark-extensions-3.2_${scalaVersion}"
- include ":iceberg-spark:spark-runtime-3.2_${scalaVersion}"
- project(":iceberg-spark:spark-3.2_${scalaVersion}").projectDir = file('spark/v3.2/spark')
- project(":iceberg-spark:spark-3.2_${scalaVersion}").name = "iceberg-spark-3.2_${scalaVersion}"
- project(":iceberg-spark:spark-extensions-3.2_${scalaVersion}").projectDir = file('spark/v3.2/spark-extensions')
- project(":iceberg-spark:spark-extensions-3.2_${scalaVersion}").name = "iceberg-spark-extensions-3.2_${scalaVersion}"
- project(":iceberg-spark:spark-runtime-3.2_${scalaVersion}").projectDir = file('spark/v3.2/spark-runtime')
- project(":iceberg-spark:spark-runtime-3.2_${scalaVersion}").name = "iceberg-spark-runtime-3.2_${scalaVersion}"
-}
-
if (sparkVersions.contains("3.3")) {
include ":iceberg-spark:spark-3.3_${scalaVersion}"
include ":iceberg-spark:spark-extensions-3.3_${scalaVersion}"
diff --git a/site/docs/contribute.md b/site/docs/contribute.md
index 3e0cae347022..fdc1ef46d8a7 100644
--- a/site/docs/contribute.md
+++ b/site/docs/contribute.md
@@ -53,7 +53,7 @@ Iceberg is built using Gradle with Java 8 or Java 11.
* To invoke a build and run tests: `./gradlew build`
* To skip tests: `./gradlew build -x test -x integrationTest`
* To fix code style: `./gradlew spotlessApply`
-* To build particular Spark/Flink Versions: `./gradlew build -DsparkVersions=3.2,3.3 -DflinkVersions=1.14`
+* To build particular Spark/Flink Versions: `./gradlew build -DsparkVersions=3.4,3.5 -DflinkVersions=1.14`
Iceberg table support is organized in library modules:
diff --git a/site/docs/docs/nightly/docs/dell.md b/site/docs/docs/nightly/docs/dell.md
index a99d8d044d61..f04e5d6eeabf 100644
--- a/site/docs/docs/nightly/docs/dell.md
+++ b/site/docs/docs/nightly/docs/dell.md
@@ -52,11 +52,11 @@ Even though the [Dell ECS client](https://github.com/EMCECS/ecs-object-client-ja
### Spark
-To use the Dell ECS catalog with Spark 3.2.1, you should create a Spark session like:
+To use the Dell ECS catalog with Spark 3.5.0, you should create a Spark session like:
```bash
-ICEBERG_VERSION=0.15.0
-SPARK_VERSION=3.2_2.12
+ICEBERG_VERSION=1.4.2
+SPARK_VERSION=3.5_2.12
ECS_CLIENT_VERSION=3.3.2
DEPENDENCIES="org.apache.iceberg:iceberg-spark-runtime-${SPARK_VERSION}:${ICEBERG_VERSION},\
diff --git a/site/docs/docs/nightly/docs/jdbc.md b/site/docs/docs/nightly/docs/jdbc.md
index 7b525fb7ee77..e4dd38a9a1dc 100644
--- a/site/docs/docs/nightly/docs/jdbc.md
+++ b/site/docs/docs/nightly/docs/jdbc.md
@@ -44,7 +44,7 @@ the JDBC catalog allows arbitrary configurations through:
You can start a Spark session with a MySQL JDBC connection using the following configurations:
```shell
-spark-sql --packages org.apache.iceberg:iceberg-spark-runtime-3.2_2.12:{{ icebergVersion }} \
+spark-sql --packages org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:{{ icebergVersion }} \
--conf spark.sql.catalog.my_catalog=org.apache.iceberg.spark.SparkCatalog \
--conf spark.sql.catalog.my_catalog.warehouse=s3://my-bucket/my/key/prefix \
--conf spark.sql.catalog.my_catalog.catalog-impl=org.apache.iceberg.jdbc.JdbcCatalog \
diff --git a/site/docs/docs/nightly/docs/spark-getting-started.md b/site/docs/docs/nightly/docs/spark-getting-started.md
index fdad0671f7e4..d9dd70e4fa33 100644
--- a/site/docs/docs/nightly/docs/spark-getting-started.md
+++ b/site/docs/docs/nightly/docs/spark-getting-started.md
@@ -31,11 +31,11 @@ You can also view documentations of using Iceberg with other compute engine unde
To use Iceberg in a Spark shell, use the `--packages` option:
```sh
-spark-shell --packages org.apache.iceberg:iceberg-spark-runtime-3.2_2.12:{{ icebergVersion }}
+spark-shell --packages org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:{{ icebergVersion }}
```
!!! info
- If you want to include Iceberg in your Spark installation, add the [`iceberg-spark-runtime-3.2_2.12` Jar](https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-spark-runtime-3.2_2.12/{{ icebergVersion }}/iceberg-spark-runtime-3.2_2.12-{{ icebergVersion }}.jar) to Spark's `jars` folder.
+ If you want to include Iceberg in your Spark installation, add the [`iceberg-spark-runtime-3.5_2.12` Jar](https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-spark-runtime-3.5_2.12/{{ icebergVersion }}/iceberg-spark-runtime-3.5_2.12-{{ icebergVersion }}.jar) to Spark's `jars` folder.
### Adding catalogs
@@ -45,7 +45,7 @@ Iceberg comes with [catalogs](spark-configuration.md#catalogs) that enable SQL c
This command creates a path-based catalog named `local` for tables under `$PWD/warehouse` and adds support for Iceberg tables to Spark's built-in catalog:
```sh
-spark-sql --packages org.apache.iceberg:iceberg-spark-runtime-3.2_2.12:{{ icebergVersion }}\
+spark-sql --packages org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:{{ icebergVersion }}\
--conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions \
--conf spark.sql.catalog.spark_catalog=org.apache.iceberg.spark.SparkSessionCatalog \
--conf spark.sql.catalog.spark_catalog.type=hive \
diff --git a/site/docs/docs/nightly/docs/spark-procedures.md b/site/docs/docs/nightly/docs/spark-procedures.md
index 645865311c86..d9a1ca65ec15 100644
--- a/site/docs/docs/nightly/docs/spark-procedures.md
+++ b/site/docs/docs/nightly/docs/spark-procedures.md
@@ -343,7 +343,7 @@ Iceberg can compact data files in parallel using Spark with the `rewriteDataFile
|---------------|-----------|------|-------------|
| `table` | ✔️ | string | Name of the table to update |
| `strategy` | | string | Name of the strategy - binpack or sort. Defaults to binpack strategy |
-| `sort_order` | | string | For Zorder use a comma separated list of columns within zorder(). (Supported in Spark 3.2 and Above) Example: zorder(c1,c2,c3). Else, Comma separated sort orders in the format (ColumnName SortDirection NullOrder). Where SortDirection can be ASC or DESC. NullOrder can be NULLS FIRST or NULLS LAST. Defaults to the table's sort order |
+| `sort_order` | | string | For Zorder use a comma separated list of columns within zorder(). Example: zorder(c1,c2,c3). Else, Comma separated sort orders in the format (ColumnName SortDirection NullOrder). Where SortDirection can be ASC or DESC. NullOrder can be NULLS FIRST or NULLS LAST. Defaults to the table's sort order |
| `options` | ️ | map | Options to be used for actions|
| `where` | ️ | string | predicate as a string used for filtering the files. Note that all files that may contain data matching the filter will be selected for rewriting|
diff --git a/site/docs/docs/nightly/docs/spark-queries.md b/site/docs/docs/nightly/docs/spark-queries.md
index 9198538f5a73..2616687cccd0 100644
--- a/site/docs/docs/nightly/docs/spark-queries.md
+++ b/site/docs/docs/nightly/docs/spark-queries.md
@@ -188,9 +188,6 @@ To inspect a table's history, snapshots, and other metadata, Iceberg supports me
Metadata tables are identified by adding the metadata table name after the original table name. For example, history for `db.table` is read using `db.table.history`.
-!!! info
- For Spark 3, prior to 3.2, the Spark [session catalog](spark-configuration.md#replacing-the-session-catalog) does not support table names with multipart identifiers such as `catalog.database.table.metadata`. As a workaround, configure an `org.apache.iceberg.spark.SparkCatalog`, or use the Spark `DataFrameReader` API.
-
### History
diff --git a/site/docs/multi-engine-support.md b/site/docs/multi-engine-support.md
index 75477ffc7106..20fd10616754 100644
--- a/site/docs/multi-engine-support.md
+++ b/site/docs/multi-engine-support.md
@@ -29,8 +29,8 @@ Connectors for Spark, Flink and Hive are maintained in the main Iceberg reposito
Processing engine connectors maintained in the iceberg repository are built for multiple versions.
For Spark and Flink, each new version that introduces backwards incompatible upgrade has its dedicated integration codebase and release artifacts.
-For example, the code for Iceberg Spark 3.1 integration is under `/spark/v3.1` and the code for Iceberg Spark 3.2 integration is under `/spark/v3.2`.
-Different artifacts (`iceberg-spark-3.1_2.12` and `iceberg-spark-3.2_2.12`) are released for users to consume.
+For example, the code for Iceberg Spark 3.4 integration is under `/spark/v3.4` and the code for Iceberg Spark 3.5 integration is under `/spark/v3.5`.
+Different artifacts (`iceberg-spark-3.4_2.12` and `iceberg-spark-3.5_2.12`) are released for users to consume.
By doing this, changes across versions are isolated.
New features in Iceberg could be developed against the latest features of an engine without breaking support of old APIs in past engine versions.
@@ -40,7 +40,7 @@ For Hive, Hive 2 uses the `iceberg-mr` package for Iceberg integration, and Hive
Iceberg provides a runtime connector jar for each supported version of Spark, Flink and Hive.
When using Iceberg with these engines, the runtime jar is the only addition to the classpath needed in addition to vendor dependencies.
-For example, to use Iceberg with Spark 3.2 and AWS integrations, `iceberg-spark-runtime-3.2_2.12` and AWS SDK dependencies are needed for the Spark installation.
+For example, to use Iceberg with Spark 3.5 and AWS integrations, `iceberg-spark-runtime-3.5_2.12` and AWS SDK dependencies are needed for the Spark installation.
Spark and Flink provide different runtime jars for each supported engine version.
Hive 2 and Hive 3 currently share the same runtime jar.
diff --git a/site/docs/spark-quickstart.md b/site/docs/spark-quickstart.md
index cd5efdb44c00..9601bcbdb0f8 100644
--- a/site/docs/spark-quickstart.md
+++ b/site/docs/spark-quickstart.md
@@ -281,7 +281,7 @@ This configuration creates a path-based catalog named `local` for tables under `
=== "CLI"
```sh
- spark-sql --packages org.apache.iceberg:iceberg-spark-runtime-3.2_2.12:{{ icebergVersion }}\
+ spark-sql --packages org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:{{ icebergVersion }}\
--conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions \
--conf spark.sql.catalog.spark_catalog=org.apache.iceberg.spark.SparkSessionCatalog \
--conf spark.sql.catalog.spark_catalog.type=hive \
@@ -294,7 +294,7 @@ This configuration creates a path-based catalog named `local` for tables under `
=== "spark-defaults.conf"
```sh
- spark.jars.packages org.apache.iceberg:iceberg-spark-runtime-3.2_2.12:{{ icebergVersion }}
+ spark.jars.packages org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:{{ icebergVersion }}
spark.sql.extensions org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions
spark.sql.catalog.spark_catalog org.apache.iceberg.spark.SparkSessionCatalog
spark.sql.catalog.spark_catalog.type hive
@@ -316,26 +316,26 @@ If you already have a Spark environment, you can add Iceberg, using the `--packa
=== "SparkSQL"
```sh
- spark-sql --packages org.apache.iceberg:iceberg-spark-runtime-3.2_2.12:{{ icebergVersion }}
+ spark-sql --packages org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:{{ icebergVersion }}
```
=== "Spark-Shell"
```sh
- spark-shell --packages org.apache.iceberg:iceberg-spark-runtime-3.2_2.12:{{ icebergVersion }}
+ spark-shell --packages org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:{{ icebergVersion }}
```
=== "PySpark"
```sh
- pyspark --packages org.apache.iceberg:iceberg-spark-runtime-3.2_2.12:{{ icebergVersion }}
+ pyspark --packages org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:{{ icebergVersion }}
```
!!! note
If you want to include Iceberg in your Spark installation, add the Iceberg Spark runtime to Spark's `jars` folder.
You can download the runtime by visiting to the [Releases](releases.md) page.
-[spark-runtime-jar]: https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-spark-runtime-3.2_2.12/{{ icebergVersion }}/iceberg-spark-runtime-3.2_2.12-{{ icebergVersion }}.jar
+[spark-runtime-jar]: https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-spark-runtime-3.5_2.12/{{ icebergVersion }}/iceberg-spark-runtime-3.5_2.12-{{ icebergVersion }}.jar
#### Learn More
diff --git a/spark/build.gradle b/spark/build.gradle
index 736c47e335ff..c2bc5f8a14ed 100644
--- a/spark/build.gradle
+++ b/spark/build.gradle
@@ -20,10 +20,6 @@
// add enabled Spark version modules to the build
def sparkVersions = (System.getProperty("sparkVersions") != null ? System.getProperty("sparkVersions") : System.getProperty("defaultSparkVersions")).split(",")
-if (sparkVersions.contains("3.2")) {
- apply from: file("$projectDir/v3.2/build.gradle")
-}
-
if (sparkVersions.contains("3.3")) {
apply from: file("$projectDir/v3.3/build.gradle")
}
diff --git a/spark/v3.2/build.gradle b/spark/v3.2/build.gradle
deleted file mode 100644
index d2e2ceaf266d..000000000000
--- a/spark/v3.2/build.gradle
+++ /dev/null
@@ -1,295 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-String sparkMajorVersion = '3.2'
-String scalaVersion = System.getProperty("scalaVersion") != null ? System.getProperty("scalaVersion") : System.getProperty("defaultScalaVersion")
-
-def sparkProjects = [
- project(":iceberg-spark:iceberg-spark-${sparkMajorVersion}_${scalaVersion}"),
- project(":iceberg-spark:iceberg-spark-extensions-${sparkMajorVersion}_${scalaVersion}"),
- project(":iceberg-spark:iceberg-spark-runtime-${sparkMajorVersion}_${scalaVersion}"),
-]
-
-configure(sparkProjects) {
- configurations {
- all {
- resolutionStrategy {
- force "com.fasterxml.jackson.module:jackson-module-scala_${scalaVersion}:${libs.versions.jackson212.get()}"
- force "com.fasterxml.jackson.core:jackson-paranamer:${libs.versions.jackson212.get()}"
- force "com.fasterxml.jackson.core:jackson-databind:${libs.versions.jackson212.get()}"
- force "com.fasterxml.jackson.core:jackson-core:${libs.versions.jackson212.get()}"
- }
- }
- }
-}
-
-project(":iceberg-spark:iceberg-spark-${sparkMajorVersion}_${scalaVersion}") {
- apply plugin: 'scala'
- apply plugin: 'com.github.alisiikh.scalastyle'
-
- sourceSets {
- main {
- scala.srcDirs = ['src/main/scala', 'src/main/java']
- java.srcDirs = []
- }
- }
-
- dependencies {
- implementation project(path: ':iceberg-bundled-guava', configuration: 'shadow')
- api project(':iceberg-api')
- implementation project(':iceberg-common')
- implementation project(':iceberg-core')
- implementation project(':iceberg-data')
- implementation project(':iceberg-orc')
- implementation project(':iceberg-parquet')
- implementation(project(':iceberg-arrow')) {
- exclude group: 'io.netty', module: 'netty-buffer'
- exclude group: 'io.netty', module: 'netty-common'
- }
- implementation "org.scala-lang.modules:scala-collection-compat_${scalaVersion}:${libs.versions.scala.collection.compat.get()}"
-
- compileOnly libs.errorprone.annotations
- compileOnly libs.avro.avro
- compileOnly("org.apache.spark:spark-hive_${scalaVersion}:${libs.versions.spark.hive32.get()}") {
- exclude group: 'org.apache.avro', module: 'avro'
- exclude group: 'org.apache.arrow'
- exclude group: 'org.apache.parquet'
- exclude group: 'io.netty', module: 'netty-buffer'
- exclude group: 'io.netty', module: 'netty-common'
- exclude group: 'org.roaringbitmap'
- }
-
- implementation libs.parquet.column
- implementation libs.parquet.hadoop
-
- implementation("${libs.orc.core.get().module}:${libs.versions.orc.get()}:nohive") {
- exclude group: 'org.apache.hadoop'
- exclude group: 'commons-lang'
- // These artifacts are shaded and included in the orc-core fat jar
- exclude group: 'com.google.protobuf', module: 'protobuf-java'
- exclude group: 'org.apache.hive', module: 'hive-storage-api'
- }
-
- implementation(libs.arrow.vector) {
- exclude group: 'io.netty', module: 'netty-buffer'
- exclude group: 'io.netty', module: 'netty-common'
- exclude group: 'com.google.code.findbugs', module: 'jsr305'
- }
-
- // use netty-buffer compatible with Spark 3.2
- runtimeOnly libs.netty.buffer.compat
-
- testImplementation(libs.hadoop2.minicluster) {
- exclude group: 'org.apache.avro', module: 'avro'
- exclude group: 'io.netty', module: 'netty-buffer'
- exclude group: 'io.netty', module: 'netty-common'
- }
- testImplementation project(path: ':iceberg-hive-metastore')
- testImplementation project(path: ':iceberg-hive-metastore', configuration: 'testArtifacts')
- testImplementation project(path: ':iceberg-api', configuration: 'testArtifacts')
- testImplementation project(path: ':iceberg-core', configuration: 'testArtifacts')
- testImplementation project(path: ':iceberg-data', configuration: 'testArtifacts')
- testImplementation libs.sqlite.jdbc
- testImplementation libs.awaitility
- }
-
- tasks.withType(Test) {
- // Vectorized reads need more memory
- maxHeapSize '2560m'
- }
-}
-
-project(":iceberg-spark:iceberg-spark-extensions-${sparkMajorVersion}_${scalaVersion}") {
- apply plugin: 'java-library'
- apply plugin: 'scala'
- apply plugin: 'com.github.alisiikh.scalastyle'
- apply plugin: 'antlr'
-
- configurations {
- /*
- The Gradle Antlr plugin erroneously adds both antlr-build and runtime dependencies to the runtime path. This
- bug https://github.com/gradle/gradle/issues/820 exists because older versions of Antlr do not have separate
- runtime and implementation dependencies and they do not want to break backwards compatibility. So to only end up with
- the runtime dependency on the runtime classpath we remove the dependencies added by the plugin here. Then add
- the runtime dependency back to only the runtime configuration manually.
- */
- implementation {
- extendsFrom = extendsFrom.findAll { it != configurations.antlr }
- }
- }
-
- dependencies {
- implementation("org.scala-lang.modules:scala-collection-compat_${scalaVersion}:${libs.versions.scala.collection.compat.get()}")
-
- compileOnly "org.scala-lang:scala-library"
- compileOnly project(path: ':iceberg-bundled-guava', configuration: 'shadow')
- compileOnly project(':iceberg-api')
- compileOnly project(':iceberg-core')
- compileOnly project(':iceberg-common')
- compileOnly project(":iceberg-spark:iceberg-spark-${sparkMajorVersion}_${scalaVersion}")
- compileOnly("org.apache.spark:spark-hive_${scalaVersion}:${libs.versions.spark.hive32.get()}") {
- exclude group: 'org.apache.avro', module: 'avro'
- exclude group: 'org.apache.arrow'
- exclude group: 'org.apache.parquet'
- exclude group: 'io.netty', module: 'netty-buffer'
- exclude group: 'io.netty', module: 'netty-common'
- exclude group: 'org.roaringbitmap'
- }
-
- testImplementation project(path: ':iceberg-data')
- testImplementation project(path: ':iceberg-parquet')
- testImplementation project(path: ':iceberg-hive-metastore')
- testImplementation project(path: ':iceberg-api', configuration: 'testArtifacts')
- testImplementation project(path: ':iceberg-hive-metastore', configuration: 'testArtifacts')
- testImplementation project(path: ":iceberg-spark:iceberg-spark-${sparkMajorVersion}_${scalaVersion}", configuration: 'testArtifacts')
-
- testImplementation libs.avro.avro
- testImplementation libs.parquet.hadoop
-
- // Required because we remove antlr plugin dependencies from the compile configuration, see note above
- runtimeOnly libs.antlr.runtime
- antlr libs.antlr.antlr4
- }
-
- generateGrammarSource {
- maxHeapSize = "64m"
- arguments += ['-visitor', '-package', 'org.apache.spark.sql.catalyst.parser.extensions']
- }
-}
-
-project(":iceberg-spark:iceberg-spark-runtime-${sparkMajorVersion}_${scalaVersion}") {
- apply plugin: 'com.github.johnrengelman.shadow'
-
- tasks.jar.dependsOn tasks.shadowJar
-
- sourceSets {
- integration {
- java.srcDir "$projectDir/src/integration/java"
- resources.srcDir "$projectDir/src/integration/resources"
- }
- }
-
- configurations {
- implementation {
- exclude group: 'org.apache.spark'
- // included in Spark
- exclude group: 'org.slf4j'
- exclude group: 'org.apache.commons'
- exclude group: 'commons-pool'
- exclude group: 'commons-codec'
- exclude group: 'org.xerial.snappy'
- exclude group: 'javax.xml.bind'
- exclude group: 'javax.annotation'
- exclude group: 'com.github.luben'
- exclude group: 'com.ibm.icu'
- exclude group: 'org.glassfish'
- exclude group: 'org.abego.treelayout'
- exclude group: 'org.antlr'
- exclude group: 'org.scala-lang'
- exclude group: 'org.scala-lang.modules'
- }
- }
-
- dependencies {
- api project(':iceberg-api')
- implementation project(":iceberg-spark:iceberg-spark-${sparkMajorVersion}_${scalaVersion}")
- implementation project(":iceberg-spark:iceberg-spark-extensions-${sparkMajorVersion}_${scalaVersion}")
- implementation project(':iceberg-aws')
- implementation project(':iceberg-azure')
- implementation(project(':iceberg-aliyun')) {
- exclude group: 'edu.umd.cs.findbugs', module: 'findbugs'
- exclude group: 'org.apache.httpcomponents', module: 'httpclient'
- exclude group: 'commons-logging', module: 'commons-logging'
- }
- implementation project(':iceberg-gcp')
- implementation project(':iceberg-hive-metastore')
- implementation(project(':iceberg-nessie')) {
- exclude group: 'com.google.code.findbugs', module: 'jsr305'
- }
- implementation (project(':iceberg-snowflake')) {
- exclude group: 'net.snowflake' , module: 'snowflake-jdbc'
- }
-
- integrationImplementation "org.scala-lang.modules:scala-collection-compat_${scalaVersion}:${libs.versions.scala.collection.compat.get()}"
- integrationImplementation "org.apache.spark:spark-hive_${scalaVersion}:${libs.versions.spark.hive32.get()}"
- integrationImplementation libs.junit.vintage.engine
- integrationImplementation libs.slf4j.simple
- integrationImplementation libs.assertj.core
- integrationImplementation project(path: ':iceberg-api', configuration: 'testArtifacts')
- integrationImplementation project(path: ':iceberg-hive-metastore', configuration: 'testArtifacts')
- integrationImplementation project(path: ":iceberg-spark:iceberg-spark-${sparkMajorVersion}_${scalaVersion}", configuration: 'testArtifacts')
- integrationImplementation project(path: ":iceberg-spark:iceberg-spark-extensions-${sparkMajorVersion}_${scalaVersion}", configuration: 'testArtifacts')
- // Not allowed on our classpath, only the runtime jar is allowed
- integrationCompileOnly project(":iceberg-spark:iceberg-spark-extensions-${sparkMajorVersion}_${scalaVersion}")
- integrationCompileOnly project(":iceberg-spark:iceberg-spark-${sparkMajorVersion}_${scalaVersion}")
- integrationCompileOnly project(':iceberg-api')
- }
-
- shadowJar {
- configurations = [project.configurations.runtimeClasspath]
-
- zip64 true
-
- // include the LICENSE and NOTICE files for the shaded Jar
- from(projectDir) {
- include 'LICENSE'
- include 'NOTICE'
- }
-
- // Relocate dependencies to avoid conflicts
- relocate 'com.google.errorprone', 'org.apache.iceberg.shaded.com.google.errorprone'
- relocate 'com.google.flatbuffers', 'org.apache.iceberg.shaded.com.google.flatbuffers'
- relocate 'com.fasterxml', 'org.apache.iceberg.shaded.com.fasterxml'
- relocate 'com.github.benmanes', 'org.apache.iceberg.shaded.com.github.benmanes'
- relocate 'org.checkerframework', 'org.apache.iceberg.shaded.org.checkerframework'
- relocate 'org.apache.avro', 'org.apache.iceberg.shaded.org.apache.avro'
- relocate 'avro.shaded', 'org.apache.iceberg.shaded.org.apache.avro.shaded'
- relocate 'com.thoughtworks.paranamer', 'org.apache.iceberg.shaded.com.thoughtworks.paranamer'
- relocate 'org.apache.parquet', 'org.apache.iceberg.shaded.org.apache.parquet'
- relocate 'shaded.parquet', 'org.apache.iceberg.shaded.org.apache.parquet.shaded'
- relocate 'org.apache.orc', 'org.apache.iceberg.shaded.org.apache.orc'
- relocate 'io.airlift', 'org.apache.iceberg.shaded.io.airlift'
- relocate 'org.apache.hc.client5', 'org.apache.iceberg.shaded.org.apache.hc.client5'
- relocate 'org.apache.hc.core5', 'org.apache.iceberg.shaded.org.apache.hc.core5'
- // relocate Arrow and related deps to shade Iceberg specific version
- relocate 'io.netty', 'org.apache.iceberg.shaded.io.netty'
- relocate 'org.apache.arrow', 'org.apache.iceberg.shaded.org.apache.arrow'
- relocate 'com.carrotsearch', 'org.apache.iceberg.shaded.com.carrotsearch'
- relocate 'org.threeten.extra', 'org.apache.iceberg.shaded.org.threeten.extra'
- relocate 'org.roaringbitmap', 'org.apache.iceberg.shaded.org.roaringbitmap'
-
- archiveClassifier.set(null)
- }
-
- task integrationTest(type: Test) {
- description = "Test Spark3 Runtime Jar against Spark ${sparkMajorVersion}"
- group = "verification"
- jvmArgs += project.property('extraJvmArgs')
- testClassesDirs = sourceSets.integration.output.classesDirs
- classpath = sourceSets.integration.runtimeClasspath + files(shadowJar.archiveFile.get().asFile.path)
- inputs.file(shadowJar.archiveFile.get().asFile.path)
- }
- integrationTest.dependsOn shadowJar
- check.dependsOn integrationTest
-
- jar {
- enabled = false
- }
-}
-
diff --git a/spark/v3.2/spark-extensions/src/main/antlr/org.apache.spark.sql.catalyst.parser.extensions/IcebergSqlExtensions.g4 b/spark/v3.2/spark-extensions/src/main/antlr/org.apache.spark.sql.catalyst.parser.extensions/IcebergSqlExtensions.g4
deleted file mode 100644
index 7bd556acc5aa..000000000000
--- a/spark/v3.2/spark-extensions/src/main/antlr/org.apache.spark.sql.catalyst.parser.extensions/IcebergSqlExtensions.g4
+++ /dev/null
@@ -1,374 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- *
- * This file is an adaptation of Presto's and Spark's grammar files.
- */
-
-grammar IcebergSqlExtensions;
-
-@lexer::members {
- /**
- * Verify whether current token is a valid decimal token (which contains dot).
- * Returns true if the character that follows the token is not a digit or letter or underscore.
- *
- * For example:
- * For char stream "2.3", "2." is not a valid decimal token, because it is followed by digit '3'.
- * For char stream "2.3_", "2.3" is not a valid decimal token, because it is followed by '_'.
- * For char stream "2.3W", "2.3" is not a valid decimal token, because it is followed by 'W'.
- * For char stream "12.0D 34.E2+0.12 " 12.0D is a valid decimal token because it is followed
- * by a space. 34.E2 is a valid decimal token because it is followed by symbol '+'
- * which is not a digit or letter or underscore.
- */
- public boolean isValidDecimal() {
- int nextChar = _input.LA(1);
- if (nextChar >= 'A' && nextChar <= 'Z' || nextChar >= '0' && nextChar <= '9' ||
- nextChar == '_') {
- return false;
- } else {
- return true;
- }
- }
-
- /**
- * This method will be called when we see '/*' and try to match it as a bracketed comment.
- * If the next character is '+', it should be parsed as hint later, and we cannot match
- * it as a bracketed comment.
- *
- * Returns true if the next character is '+'.
- */
- public boolean isHint() {
- int nextChar = _input.LA(1);
- if (nextChar == '+') {
- return true;
- } else {
- return false;
- }
- }
-}
-
-singleStatement
- : statement EOF
- ;
-
-statement
- : CALL multipartIdentifier '(' (callArgument (',' callArgument)*)? ')' #call
- | ALTER TABLE multipartIdentifier ADD PARTITION FIELD transform (AS name=identifier)? #addPartitionField
- | ALTER TABLE multipartIdentifier DROP PARTITION FIELD transform #dropPartitionField
- | ALTER TABLE multipartIdentifier REPLACE PARTITION FIELD transform WITH transform (AS name=identifier)? #replacePartitionField
- | ALTER TABLE multipartIdentifier WRITE writeSpec #setWriteDistributionAndOrdering
- | ALTER TABLE multipartIdentifier SET IDENTIFIER_KW FIELDS fieldList #setIdentifierFields
- | ALTER TABLE multipartIdentifier DROP IDENTIFIER_KW FIELDS fieldList #dropIdentifierFields
- | ALTER TABLE multipartIdentifier createReplaceBranchClause #createOrReplaceBranch
- | ALTER TABLE multipartIdentifier createReplaceTagClause #createOrReplaceTag
- | ALTER TABLE multipartIdentifier DROP BRANCH (IF EXISTS)? identifier #dropBranch
- | ALTER TABLE multipartIdentifier DROP TAG (IF EXISTS)? identifier #dropTag
- ;
-
-createReplaceTagClause
- : (CREATE OR)? REPLACE TAG identifier tagOptions
- | CREATE TAG (IF NOT EXISTS)? identifier tagOptions
- ;
-
-createReplaceBranchClause
- : (CREATE OR)? REPLACE BRANCH identifier branchOptions
- | CREATE BRANCH (IF NOT EXISTS)? identifier branchOptions
- ;
-
-tagOptions
- : (AS OF VERSION snapshotId)? (refRetain)?
- ;
-
-branchOptions
- : (AS OF VERSION snapshotId)? (refRetain)? (snapshotRetention)?
- ;
-
-snapshotRetention
- : WITH SNAPSHOT RETENTION minSnapshotsToKeep
- | WITH SNAPSHOT RETENTION maxSnapshotAge
- | WITH SNAPSHOT RETENTION minSnapshotsToKeep maxSnapshotAge
- ;
-
-refRetain
- : RETAIN number timeUnit
- ;
-
-maxSnapshotAge
- : number timeUnit
- ;
-
-minSnapshotsToKeep
- : number SNAPSHOTS
- ;
-
-writeSpec
- : (writeDistributionSpec | writeOrderingSpec)*
- ;
-
-writeDistributionSpec
- : DISTRIBUTED BY PARTITION
- ;
-
-writeOrderingSpec
- : LOCALLY? ORDERED BY order
- | UNORDERED
- ;
-
-callArgument
- : expression #positionalArgument
- | identifier '=>' expression #namedArgument
- ;
-
-singleOrder
- : order EOF
- ;
-
-order
- : fields+=orderField (',' fields+=orderField)*
- | '(' fields+=orderField (',' fields+=orderField)* ')'
- ;
-
-orderField
- : transform direction=(ASC | DESC)? (NULLS nullOrder=(FIRST | LAST))?
- ;
-
-transform
- : multipartIdentifier #identityTransform
- | transformName=identifier
- '(' arguments+=transformArgument (',' arguments+=transformArgument)* ')' #applyTransform
- ;
-
-transformArgument
- : multipartIdentifier
- | constant
- ;
-
-expression
- : constant
- | stringMap
- | stringArray
- ;
-
-constant
- : number #numericLiteral
- | booleanValue #booleanLiteral
- | STRING+ #stringLiteral
- | identifier STRING #typeConstructor
- ;
-
-stringMap
- : MAP '(' constant (',' constant)* ')'
- ;
-
-stringArray
- : ARRAY '(' constant (',' constant)* ')'
- ;
-
-booleanValue
- : TRUE | FALSE
- ;
-
-number
- : MINUS? EXPONENT_VALUE #exponentLiteral
- | MINUS? DECIMAL_VALUE #decimalLiteral
- | MINUS? INTEGER_VALUE #integerLiteral
- | MINUS? BIGINT_LITERAL #bigIntLiteral
- | MINUS? SMALLINT_LITERAL #smallIntLiteral
- | MINUS? TINYINT_LITERAL #tinyIntLiteral
- | MINUS? DOUBLE_LITERAL #doubleLiteral
- | MINUS? FLOAT_LITERAL #floatLiteral
- | MINUS? BIGDECIMAL_LITERAL #bigDecimalLiteral
- ;
-
-multipartIdentifier
- : parts+=identifier ('.' parts+=identifier)*
- ;
-
-identifier
- : IDENTIFIER #unquotedIdentifier
- | quotedIdentifier #quotedIdentifierAlternative
- | nonReserved #unquotedIdentifier
- ;
-
-quotedIdentifier
- : BACKQUOTED_IDENTIFIER
- ;
-
-fieldList
- : fields+=multipartIdentifier (',' fields+=multipartIdentifier)*
- ;
-
-nonReserved
- : ADD | ALTER | AS | ASC | BRANCH | BY | CALL | CREATE | DAYS | DESC | DROP | EXISTS | FIELD | FIRST | HOURS | IF | LAST | NOT | NULLS | OF | OR | ORDERED | PARTITION | TABLE | WRITE
- | DISTRIBUTED | LOCALLY | MINUTES | MONTHS | UNORDERED | REPLACE | RETAIN | VERSION | WITH | IDENTIFIER_KW | FIELDS | SET | SNAPSHOT | SNAPSHOTS
- | TAG | TRUE | FALSE
- | MAP
- ;
-
-snapshotId
- : number
- ;
-
-numSnapshots
- : number
- ;
-
-timeUnit
- : DAYS
- | HOURS
- | MINUTES
- ;
-
-ADD: 'ADD';
-ALTER: 'ALTER';
-AS: 'AS';
-ASC: 'ASC';
-BRANCH: 'BRANCH';
-BY: 'BY';
-CALL: 'CALL';
-CREATE: 'CREATE';
-DAYS: 'DAYS';
-DESC: 'DESC';
-DISTRIBUTED: 'DISTRIBUTED';
-DROP: 'DROP';
-EXISTS: 'EXISTS';
-FIELD: 'FIELD';
-FIELDS: 'FIELDS';
-FIRST: 'FIRST';
-HOURS: 'HOURS';
-IF : 'IF';
-LAST: 'LAST';
-LOCALLY: 'LOCALLY';
-MINUTES: 'MINUTES';
-MONTHS: 'MONTHS';
-NOT: 'NOT';
-NULLS: 'NULLS';
-OF: 'OF';
-OR: 'OR';
-ORDERED: 'ORDERED';
-PARTITION: 'PARTITION';
-REPLACE: 'REPLACE';
-RETAIN: 'RETAIN';
-RETENTION: 'RETENTION';
-IDENTIFIER_KW: 'IDENTIFIER';
-SET: 'SET';
-SNAPSHOT: 'SNAPSHOT';
-SNAPSHOTS: 'SNAPSHOTS';
-TABLE: 'TABLE';
-TAG: 'TAG';
-UNORDERED: 'UNORDERED';
-VERSION: 'VERSION';
-WITH: 'WITH';
-WRITE: 'WRITE';
-
-TRUE: 'TRUE';
-FALSE: 'FALSE';
-
-MAP: 'MAP';
-ARRAY: 'ARRAY';
-
-PLUS: '+';
-MINUS: '-';
-
-STRING
- : '\'' ( ~('\''|'\\') | ('\\' .) )* '\''
- | '"' ( ~('"'|'\\') | ('\\' .) )* '"'
- ;
-
-BIGINT_LITERAL
- : DIGIT+ 'L'
- ;
-
-SMALLINT_LITERAL
- : DIGIT+ 'S'
- ;
-
-TINYINT_LITERAL
- : DIGIT+ 'Y'
- ;
-
-INTEGER_VALUE
- : DIGIT+
- ;
-
-EXPONENT_VALUE
- : DIGIT+ EXPONENT
- | DECIMAL_DIGITS EXPONENT {isValidDecimal()}?
- ;
-
-DECIMAL_VALUE
- : DECIMAL_DIGITS {isValidDecimal()}?
- ;
-
-FLOAT_LITERAL
- : DIGIT+ EXPONENT? 'F'
- | DECIMAL_DIGITS EXPONENT? 'F' {isValidDecimal()}?
- ;
-
-DOUBLE_LITERAL
- : DIGIT+ EXPONENT? 'D'
- | DECIMAL_DIGITS EXPONENT? 'D' {isValidDecimal()}?
- ;
-
-BIGDECIMAL_LITERAL
- : DIGIT+ EXPONENT? 'BD'
- | DECIMAL_DIGITS EXPONENT? 'BD' {isValidDecimal()}?
- ;
-
-IDENTIFIER
- : (LETTER | DIGIT | '_')+
- ;
-
-BACKQUOTED_IDENTIFIER
- : '`' ( ~'`' | '``' )* '`'
- ;
-
-fragment DECIMAL_DIGITS
- : DIGIT+ '.' DIGIT*
- | '.' DIGIT+
- ;
-
-fragment EXPONENT
- : 'E' [+-]? DIGIT+
- ;
-
-fragment DIGIT
- : [0-9]
- ;
-
-fragment LETTER
- : [A-Z]
- ;
-
-SIMPLE_COMMENT
- : '--' ('\\\n' | ~[\r\n])* '\r'? '\n'? -> channel(HIDDEN)
- ;
-
-BRACKETED_COMMENT
- : '/*' {!isHint()}? (BRACKETED_COMMENT|.)*? '*/' -> channel(HIDDEN)
- ;
-
-WS
- : [ \r\n\t]+ -> channel(HIDDEN)
- ;
-
-// Catch-all for anything we can't recognize.
-// We use this to be able to ignore and recover all the text
-// when splitting statements with DelimiterLexer
-UNRECOGNIZED
- : .
- ;
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/iceberg/spark/extensions/IcebergSparkSessionExtensions.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/iceberg/spark/extensions/IcebergSparkSessionExtensions.scala
deleted file mode 100644
index 455129f2c9d5..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/iceberg/spark/extensions/IcebergSparkSessionExtensions.scala
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.iceberg.spark.extensions
-
-import org.apache.spark.sql.SparkSessionExtensions
-import org.apache.spark.sql.catalyst.analysis.AlignedRowLevelIcebergCommandCheck
-import org.apache.spark.sql.catalyst.analysis.AlignRowLevelCommandAssignments
-import org.apache.spark.sql.catalyst.analysis.CheckMergeIntoTableConditions
-import org.apache.spark.sql.catalyst.analysis.MergeIntoIcebergTableResolutionCheck
-import org.apache.spark.sql.catalyst.analysis.ProcedureArgumentCoercion
-import org.apache.spark.sql.catalyst.analysis.ResolveMergeIntoTableReferences
-import org.apache.spark.sql.catalyst.analysis.ResolveProcedures
-import org.apache.spark.sql.catalyst.analysis.RewriteDeleteFromTable
-import org.apache.spark.sql.catalyst.analysis.RewriteMergeIntoTable
-import org.apache.spark.sql.catalyst.analysis.RewriteUpdateTable
-import org.apache.spark.sql.catalyst.optimizer.ExtendedReplaceNullWithFalseInPredicate
-import org.apache.spark.sql.catalyst.optimizer.ExtendedSimplifyConditionalsInPredicate
-import org.apache.spark.sql.catalyst.parser.extensions.IcebergSparkSqlExtensionsParser
-import org.apache.spark.sql.execution.datasources.v2.ExtendedDataSourceV2Strategy
-import org.apache.spark.sql.execution.datasources.v2.ExtendedV2Writes
-import org.apache.spark.sql.execution.datasources.v2.OptimizeMetadataOnlyDeleteFromTable
-import org.apache.spark.sql.execution.datasources.v2.ReplaceRewrittenRowLevelCommand
-import org.apache.spark.sql.execution.datasources.v2.RowLevelCommandScanRelationPushDown
-import org.apache.spark.sql.execution.dynamicpruning.RowLevelCommandDynamicPruning
-
-class IcebergSparkSessionExtensions extends (SparkSessionExtensions => Unit) {
-
- override def apply(extensions: SparkSessionExtensions): Unit = {
- // parser extensions
- extensions.injectParser { case (_, parser) => new IcebergSparkSqlExtensionsParser(parser) }
-
- // analyzer extensions
- extensions.injectResolutionRule { spark => ResolveProcedures(spark) }
- extensions.injectResolutionRule { spark => ResolveMergeIntoTableReferences(spark) }
- extensions.injectResolutionRule { _ => CheckMergeIntoTableConditions }
- extensions.injectResolutionRule { _ => ProcedureArgumentCoercion }
- extensions.injectResolutionRule { _ => AlignRowLevelCommandAssignments }
- extensions.injectResolutionRule { _ => RewriteDeleteFromTable }
- extensions.injectResolutionRule { _ => RewriteUpdateTable }
- extensions.injectResolutionRule { _ => RewriteMergeIntoTable }
- extensions.injectCheckRule { _ => MergeIntoIcebergTableResolutionCheck }
- extensions.injectCheckRule { _ => AlignedRowLevelIcebergCommandCheck }
-
- // optimizer extensions
- extensions.injectOptimizerRule { _ => ExtendedSimplifyConditionalsInPredicate }
- extensions.injectOptimizerRule { _ => ExtendedReplaceNullWithFalseInPredicate }
- // pre-CBO rules run only once and the order of the rules is important
- // - metadata deletes have to be attempted immediately after the operator optimization
- // - dynamic filters should be added before replacing commands with rewrite plans
- // - scans must be planned before building writes
- extensions.injectPreCBORule { _ => OptimizeMetadataOnlyDeleteFromTable }
- extensions.injectPreCBORule { _ => RowLevelCommandScanRelationPushDown }
- extensions.injectPreCBORule { _ => ExtendedV2Writes }
- extensions.injectPreCBORule { spark => RowLevelCommandDynamicPruning(spark) }
- extensions.injectPreCBORule { _ => ReplaceRewrittenRowLevelCommand }
-
- // planner extensions
- extensions.injectPlannerStrategy { spark => ExtendedDataSourceV2Strategy(spark) }
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/ProjectingInternalRow.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/ProjectingInternalRow.scala
deleted file mode 100644
index fb654b646738..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/ProjectingInternalRow.scala
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.catalyst
-
-import org.apache.spark.sql.catalyst.util.ArrayData
-import org.apache.spark.sql.catalyst.util.MapData
-import org.apache.spark.sql.types.DataType
-import org.apache.spark.sql.types.Decimal
-import org.apache.spark.sql.types.StructType
-import org.apache.spark.unsafe.types.CalendarInterval
-import org.apache.spark.unsafe.types.UTF8String
-
-/**
- * An InternalRow that projects particular columns from another InternalRow without copying
- * the underlying data.
- */
-case class ProjectingInternalRow(schema: StructType, colOrdinals: Seq[Int]) extends InternalRow {
- assert(schema.size == colOrdinals.size)
-
- private var row: InternalRow = _
-
- override def numFields: Int = colOrdinals.size
-
- def project(row: InternalRow): Unit = {
- this.row = row
- }
-
- override def setNullAt(i: Int): Unit = {
- throw new UnsupportedOperationException("Cannot modify InternalRowProjection")
- }
-
- override def update(i: Int, value: Any): Unit = {
- throw new UnsupportedOperationException("Cannot modify InternalRowProjection")
- }
-
- override def copy(): InternalRow = {
- val newRow = if (row != null) row.copy() else null
- val newProjection = ProjectingInternalRow(schema, colOrdinals)
- newProjection.project(newRow)
- newProjection
- }
-
- override def isNullAt(ordinal: Int): Boolean = {
- row.isNullAt(colOrdinals(ordinal))
- }
-
- override def getBoolean(ordinal: Int): Boolean = {
- row.getBoolean(colOrdinals(ordinal))
- }
-
- override def getByte(ordinal: Int): Byte = {
- row.getByte(colOrdinals(ordinal))
- }
-
- override def getShort(ordinal: Int): Short = {
- row.getShort(colOrdinals(ordinal))
- }
-
- override def getInt(ordinal: Int): Int = {
- row.getInt(colOrdinals(ordinal))
- }
-
- override def getLong(ordinal: Int): Long = {
- row.getLong(colOrdinals(ordinal))
- }
-
- override def getFloat(ordinal: Int): Float = {
- row.getFloat(colOrdinals(ordinal))
- }
-
- override def getDouble(ordinal: Int): Double = {
- row.getDouble(colOrdinals(ordinal))
- }
-
- override def getDecimal(ordinal: Int, precision: Int, scale: Int): Decimal = {
- row.getDecimal(colOrdinals(ordinal), precision, scale)
- }
-
- override def getUTF8String(ordinal: Int): UTF8String = {
- row.getUTF8String(colOrdinals(ordinal))
- }
-
- override def getBinary(ordinal: Int): Array[Byte] = {
- row.getBinary(colOrdinals(ordinal))
- }
-
- override def getInterval(ordinal: Int): CalendarInterval = {
- row.getInterval(colOrdinals(ordinal))
- }
-
- override def getStruct(ordinal: Int, numFields: Int): InternalRow = {
- row.getStruct(colOrdinals(ordinal), numFields)
- }
-
- override def getArray(ordinal: Int): ArrayData = {
- row.getArray(colOrdinals(ordinal))
- }
-
- override def getMap(ordinal: Int): MapData = {
- row.getMap(colOrdinals(ordinal))
- }
-
- override def get(ordinal: Int, dataType: DataType): AnyRef = {
- row.get(colOrdinals(ordinal), dataType)
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/AlignRowLevelCommandAssignments.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/AlignRowLevelCommandAssignments.scala
deleted file mode 100644
index ad416f2a1c63..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/AlignRowLevelCommandAssignments.scala
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.catalyst.analysis
-
-import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.expressions.AssignmentUtils
-import org.apache.spark.sql.catalyst.plans.logical.Assignment
-import org.apache.spark.sql.catalyst.plans.logical.DeleteAction
-import org.apache.spark.sql.catalyst.plans.logical.InsertAction
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.plans.logical.MergeIntoIcebergTable
-import org.apache.spark.sql.catalyst.plans.logical.UpdateAction
-import org.apache.spark.sql.catalyst.plans.logical.UpdateIcebergTable
-import org.apache.spark.sql.catalyst.rules.Rule
-
-/**
- * A rule that aligns assignments in UPDATE and MERGE operations.
- *
- * Note that this rule must be run before rewriting row-level commands.
- */
-object AlignRowLevelCommandAssignments
- extends Rule[LogicalPlan] with AssignmentAlignmentSupport {
-
- override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
- case u: UpdateIcebergTable if u.resolved && !u.aligned =>
- u.copy(assignments = alignAssignments(u.table, u.assignments))
-
- case m: MergeIntoIcebergTable if m.resolved && !m.aligned =>
- val alignedMatchedActions = m.matchedActions.map {
- case u @ UpdateAction(_, assignments) =>
- u.copy(assignments = alignAssignments(m.targetTable, assignments))
- case d: DeleteAction =>
- d
- case _ =>
- throw new AnalysisException("Matched actions can only contain UPDATE or DELETE")
- }
-
- val alignedNotMatchedActions = m.notMatchedActions.map {
- case i @ InsertAction(_, assignments) =>
- // check no nested columns are present
- val refs = assignments.map(_.key).map(AssignmentUtils.toAssignmentRef)
- refs.foreach { ref =>
- if (ref.size > 1) {
- throw new AnalysisException(
- "Nested fields are not supported inside INSERT clauses of MERGE operations: " +
- s"${ref.mkString("`", "`.`", "`")}")
- }
- }
-
- val colNames = refs.map(_.head)
-
- // check there are no duplicates
- val duplicateColNames = colNames.groupBy(identity).collect {
- case (name, matchingNames) if matchingNames.size > 1 => name
- }
-
- if (duplicateColNames.nonEmpty) {
- throw new AnalysisException(
- s"Duplicate column names inside INSERT clause: ${duplicateColNames.mkString(", ")}")
- }
-
- // reorder assignments by the target table column order
- val assignmentMap = colNames.zip(assignments).toMap
- i.copy(assignments = alignInsertActionAssignments(m.targetTable, assignmentMap))
-
- case _ =>
- throw new AnalysisException("Not matched actions can only contain INSERT")
- }
-
- m.copy(matchedActions = alignedMatchedActions, notMatchedActions = alignedNotMatchedActions)
- }
-
- private def alignInsertActionAssignments(
- targetTable: LogicalPlan,
- assignmentMap: Map[String, Assignment]): Seq[Assignment] = {
-
- val resolver = conf.resolver
-
- targetTable.output.map { targetAttr =>
- val assignment = assignmentMap
- .find { case (name, _) => resolver(name, targetAttr.name) }
- .map { case (_, assignment) => assignment }
-
- if (assignment.isEmpty) {
- throw new AnalysisException(
- s"Cannot find column '${targetAttr.name}' of the target table among " +
- s"the INSERT columns: ${assignmentMap.keys.mkString(", ")}. " +
- "INSERT clauses must provide values for all columns of the target table.")
- }
-
- val key = assignment.get.key
- val value = castIfNeeded(targetAttr, assignment.get.value, resolver)
- AssignmentUtils.handleCharVarcharLimits(Assignment(key, value))
- }
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/AlignedRowLevelIcebergCommandCheck.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/AlignedRowLevelIcebergCommandCheck.scala
deleted file mode 100644
index d915e4f10949..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/AlignedRowLevelIcebergCommandCheck.scala
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.catalyst.analysis
-
-import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.plans.logical.MergeIntoIcebergTable
-import org.apache.spark.sql.catalyst.plans.logical.UpdateIcebergTable
-
-object AlignedRowLevelIcebergCommandCheck extends (LogicalPlan => Unit) {
-
- override def apply(plan: LogicalPlan): Unit = {
- plan foreach {
- case m: MergeIntoIcebergTable if !m.aligned =>
- throw new AnalysisException(s"Could not align Iceberg MERGE INTO: $m")
- case u: UpdateIcebergTable if !u.aligned =>
- throw new AnalysisException(s"Could not align Iceberg UPDATE: $u")
- case _ => // OK
- }
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/AssignmentAlignmentSupport.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/AssignmentAlignmentSupport.scala
deleted file mode 100644
index 14115bd3cbfe..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/AssignmentAlignmentSupport.scala
+++ /dev/null
@@ -1,202 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.catalyst.analysis
-
-import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.SQLConfHelper
-import org.apache.spark.sql.catalyst.expressions.Alias
-import org.apache.spark.sql.catalyst.expressions.AnsiCast
-import org.apache.spark.sql.catalyst.expressions.AssignmentUtils._
-import org.apache.spark.sql.catalyst.expressions.Cast
-import org.apache.spark.sql.catalyst.expressions.CreateNamedStruct
-import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.catalyst.expressions.GetStructField
-import org.apache.spark.sql.catalyst.expressions.Literal
-import org.apache.spark.sql.catalyst.expressions.NamedExpression
-import org.apache.spark.sql.catalyst.plans.logical.Assignment
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.internal.SQLConf.StoreAssignmentPolicy
-import org.apache.spark.sql.types.DataType
-import org.apache.spark.sql.types.StructField
-import org.apache.spark.sql.types.StructType
-import scala.collection.compat.immutable.ArraySeq
-import scala.collection.mutable
-
-trait AssignmentAlignmentSupport extends CastSupport {
-
- self: SQLConfHelper =>
-
- private case class ColumnUpdate(ref: Seq[String], expr: Expression)
-
- /**
- * Aligns assignments to match table columns.
- *
- * This method processes and reorders given assignments so that each target column gets
- * an expression it should be set to. If a column does not have a matching assignment,
- * it will be set to its current value. For example, if one passes a table with columns c1, c2
- * and an assignment c2 = 1, this method will return c1 = c1, c2 = 1.
- *
- * This method also handles updates to nested columns. If there is an assignment to a particular
- * nested field, this method will construct a new struct with one field updated
- * preserving other fields that have not been modified. For example, if one passes a table with
- * columns c1, c2 where c2 is a struct with fields n1 and n2 and an assignment c2.n2 = 1,
- * this method will return c1 = c1, c2 = struct(c2.n1, 1).
- *
- * @param table a target table
- * @param assignments assignments to align
- * @return aligned assignments that match table columns
- */
- protected def alignAssignments(
- table: LogicalPlan,
- assignments: Seq[Assignment]): Seq[Assignment] = {
-
- val columnUpdates = assignments.map(a => ColumnUpdate(toAssignmentRef(a.key), a.value))
- val outputExprs = applyUpdates(table.output, columnUpdates)
- outputExprs.zip(table.output).map {
- case (expr, attr) => handleCharVarcharLimits(Assignment(attr, expr))
- }
- }
-
- private def applyUpdates(
- cols: Seq[NamedExpression],
- updates: Seq[ColumnUpdate],
- resolver: Resolver = conf.resolver,
- namePrefix: Seq[String] = Nil): Seq[Expression] = {
-
- // iterate through columns at the current level and find which column updates match
- cols.map { col =>
- // find matches for this column or any of its children
- val prefixMatchedUpdates = updates.filter(a => resolver(a.ref.head, col.name))
- prefixMatchedUpdates match {
- // if there is no exact match and no match for children, return the column as is
- case updates if updates.isEmpty =>
- col
-
- // if there is an exact match, return the assigned expression
- case Seq(update) if isExactMatch(update, col, resolver) =>
- castIfNeeded(col, update.expr, resolver)
-
- // if there are matches only for children
- case updates if !hasExactMatch(updates, col, resolver) =>
- col.dataType match {
- case StructType(fields) =>
- // build field expressions
- val fieldExprs = fields.zipWithIndex.map { case (field, ordinal) =>
- Alias(GetStructField(col, ordinal, Some(field.name)), field.name)()
- }
-
- // recursively apply this method on nested fields
- val newUpdates = updates.map(u => u.copy(ref = u.ref.tail))
- val updatedFieldExprs = applyUpdates(
- ArraySeq.unsafeWrapArray(fieldExprs),
- newUpdates,
- resolver,
- namePrefix :+ col.name)
-
- // construct a new struct with updated field expressions
- toNamedStruct(ArraySeq.unsafeWrapArray(fields), updatedFieldExprs)
-
- case otherType =>
- val colName = (namePrefix :+ col.name).mkString(".")
- throw new AnalysisException(
- "Updating nested fields is only supported for StructType " +
- s"but $colName is of type $otherType"
- )
- }
-
- // if there are conflicting updates, throw an exception
- // there are two illegal scenarios:
- // - multiple updates to the same column
- // - updates to a top-level struct and its nested fields (e.g., a.b and a.b.c)
- case updates if hasExactMatch(updates, col, resolver) =>
- val conflictingCols = updates.map(u => (namePrefix ++ u.ref).mkString("."))
- throw new AnalysisException(
- "Updates are in conflict for these columns: " +
- conflictingCols.distinct.mkString(", "))
- }
- }
- }
-
- private def toNamedStruct(fields: Seq[StructField], fieldExprs: Seq[Expression]): Expression = {
- val namedStructExprs = fields.zip(fieldExprs).flatMap { case (field, expr) =>
- Seq(Literal(field.name), expr)
- }
- CreateNamedStruct(namedStructExprs)
- }
-
- private def hasExactMatch(
- updates: Seq[ColumnUpdate],
- col: NamedExpression,
- resolver: Resolver): Boolean = {
-
- updates.exists(assignment => isExactMatch(assignment, col, resolver))
- }
-
- private def isExactMatch(
- update: ColumnUpdate,
- col: NamedExpression,
- resolver: Resolver): Boolean = {
-
- update.ref match {
- case Seq(namePart) if resolver(namePart, col.name) => true
- case _ => false
- }
- }
-
- protected def castIfNeeded(
- tableAttr: NamedExpression,
- expr: Expression,
- resolver: Resolver): Expression = {
-
- val storeAssignmentPolicy = conf.storeAssignmentPolicy
-
- // run the type check and catch type errors
- storeAssignmentPolicy match {
- case StoreAssignmentPolicy.STRICT | StoreAssignmentPolicy.ANSI =>
- if (expr.nullable && !tableAttr.nullable) {
- throw new AnalysisException(
- s"Cannot write nullable values to non-null column '${tableAttr.name}'")
- }
-
- // use byName = true to catch cases when struct field names don't match
- // e.g. a struct with fields (a, b) is assigned as a struct with fields (a, c) or (b, a)
- val errors = new mutable.ArrayBuffer[String]()
- val canWrite = DataType.canWrite(
- expr.dataType, tableAttr.dataType, byName = true, resolver, tableAttr.name,
- storeAssignmentPolicy, err => errors += err)
-
- if (!canWrite) {
- throw new AnalysisException(
- s"Cannot write incompatible data:\n- ${errors.mkString("\n- ")}")
- }
-
- case _ => // OK
- }
-
- storeAssignmentPolicy match {
- case _ if tableAttr.dataType.sameType(expr.dataType) =>
- expr
- case StoreAssignmentPolicy.ANSI =>
- AnsiCast(expr, tableAttr.dataType, Option(conf.sessionLocalTimeZone))
- case _ =>
- Cast(expr, tableAttr.dataType, Option(conf.sessionLocalTimeZone))
- }
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckMergeIntoTableConditions.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckMergeIntoTableConditions.scala
deleted file mode 100644
index 70f6694af60b..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckMergeIntoTableConditions.scala
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.catalyst.analysis
-
-import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.catalyst.expressions.SubqueryExpression
-import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
-import org.apache.spark.sql.catalyst.plans.logical.DeleteAction
-import org.apache.spark.sql.catalyst.plans.logical.InsertAction
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.plans.logical.MergeIntoIcebergTable
-import org.apache.spark.sql.catalyst.plans.logical.UpdateAction
-import org.apache.spark.sql.catalyst.rules.Rule
-
-/**
- * A rule that checks MERGE operations contain only supported conditions.
- *
- * Note that this rule must be run in the resolution batch before Spark executes CheckAnalysis.
- * Otherwise, CheckAnalysis will throw a less descriptive error.
- */
-object CheckMergeIntoTableConditions extends Rule[LogicalPlan] {
-
- override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
- case m: MergeIntoIcebergTable if m.resolved =>
- checkMergeIntoCondition("SEARCH", m.mergeCondition)
-
- val actions = m.matchedActions ++ m.notMatchedActions
- actions.foreach {
- case DeleteAction(Some(cond)) => checkMergeIntoCondition("DELETE", cond)
- case UpdateAction(Some(cond), _) => checkMergeIntoCondition("UPDATE", cond)
- case InsertAction(Some(cond), _) => checkMergeIntoCondition("INSERT", cond)
- case _ => // OK
- }
-
- m
- }
-
- private def checkMergeIntoCondition(condName: String, cond: Expression): Unit = {
- if (!cond.deterministic) {
- throw new AnalysisException(
- s"Non-deterministic functions are not supported in $condName conditions of " +
- s"MERGE operations: ${cond.sql}")
- }
-
- if (SubqueryExpression.hasSubquery(cond)) {
- throw new AnalysisException(
- s"Subqueries are not supported in conditions of MERGE operations. " +
- s"Found a subquery in the $condName condition: ${cond.sql}")
- }
-
- if (cond.find(_.isInstanceOf[AggregateExpression]).isDefined) {
- throw new AnalysisException(
- s"Agg functions are not supported in $condName conditions of MERGE operations: " + {cond.sql})
- }
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/MergeIntoIcebergTableResolutionCheck.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/MergeIntoIcebergTableResolutionCheck.scala
deleted file mode 100644
index b3a9bda280d2..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/MergeIntoIcebergTableResolutionCheck.scala
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.catalyst.analysis
-
-import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.plans.logical.UnresolvedMergeIntoIcebergTable
-
-object MergeIntoIcebergTableResolutionCheck extends (LogicalPlan => Unit) {
-
- override def apply(plan: LogicalPlan): Unit = {
- plan foreach {
- case m: UnresolvedMergeIntoIcebergTable =>
- throw new AnalysisException(s"Could not resolve Iceberg MERGE INTO statement: $m")
- case _ => // OK
- }
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/ProcedureArgumentCoercion.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/ProcedureArgumentCoercion.scala
deleted file mode 100644
index 7f0ca8fadded..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/ProcedureArgumentCoercion.scala
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.catalyst.analysis
-
-import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.expressions.Cast
-import org.apache.spark.sql.catalyst.plans.logical.Call
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.rules.Rule
-
-object ProcedureArgumentCoercion extends Rule[LogicalPlan] {
- override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
- case c @ Call(procedure, args) if c.resolved =>
- val params = procedure.parameters
-
- val newArgs = args.zipWithIndex.map { case (arg, index) =>
- val param = params(index)
- val paramType = param.dataType
- val argType = arg.dataType
-
- if (paramType != argType && !Cast.canUpCast(argType, paramType)) {
- throw new AnalysisException(
- s"Wrong arg type for ${param.name}: cannot cast $argType to $paramType")
- }
-
- if (paramType != argType) {
- Cast(arg, paramType)
- } else {
- arg
- }
- }
-
- if (newArgs != args) {
- c.copy(args = newArgs)
- } else {
- c
- }
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveMergeIntoTableReferences.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveMergeIntoTableReferences.scala
deleted file mode 100644
index 63ebdef95730..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveMergeIntoTableReferences.scala
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.catalyst.analysis
-
-import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.catalyst.plans.logical.Assignment
-import org.apache.spark.sql.catalyst.plans.logical.DeleteAction
-import org.apache.spark.sql.catalyst.plans.logical.InsertAction
-import org.apache.spark.sql.catalyst.plans.logical.InsertStarAction
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.plans.logical.MergeIntoIcebergTable
-import org.apache.spark.sql.catalyst.plans.logical.Project
-import org.apache.spark.sql.catalyst.plans.logical.UnresolvedMergeIntoIcebergTable
-import org.apache.spark.sql.catalyst.plans.logical.UpdateAction
-import org.apache.spark.sql.catalyst.plans.logical.UpdateStarAction
-import org.apache.spark.sql.catalyst.rules.Rule
-
-/**
- * A resolution rule similar to ResolveReferences in Spark but handles Iceberg MERGE operations.
- */
-case class ResolveMergeIntoTableReferences(spark: SparkSession) extends Rule[LogicalPlan] {
-
- private lazy val analyzer: Analyzer = spark.sessionState.analyzer
-
- override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperatorsUp {
- case m @ UnresolvedMergeIntoIcebergTable(targetTable, sourceTable, context)
- if targetTable.resolved && sourceTable.resolved && m.duplicateResolved =>
-
- val resolvedMatchedActions = context.matchedActions.map {
- case DeleteAction(cond) =>
- val resolvedCond = cond.map(resolveCond("DELETE", _, m))
- DeleteAction(resolvedCond)
-
- case UpdateAction(cond, assignments) =>
- val resolvedCond = cond.map(resolveCond("UPDATE", _, m))
- // the update action can access columns from both target and source tables
- val resolvedAssignments = resolveAssignments(assignments, m, resolveValuesWithSourceOnly = false)
- UpdateAction(resolvedCond, resolvedAssignments)
-
- case UpdateStarAction(updateCondition) =>
- val resolvedUpdateCondition = updateCondition.map(resolveCond("UPDATE", _, m))
- val assignments = targetTable.output.map { attr =>
- Assignment(attr, UnresolvedAttribute(Seq(attr.name)))
- }
- // for UPDATE *, the value must be from the source table
- val resolvedAssignments = resolveAssignments(assignments, m, resolveValuesWithSourceOnly = true)
- UpdateAction(resolvedUpdateCondition, resolvedAssignments)
-
- case _ =>
- throw new AnalysisException("Matched actions can only contain UPDATE or DELETE")
- }
-
- val resolvedNotMatchedActions = context.notMatchedActions.map {
- case InsertAction(cond, assignments) =>
- // the insert action is used when not matched, so its condition and value can only
- // access columns from the source table
- val resolvedCond = cond.map(resolveCond("INSERT", _, Project(Nil, m.sourceTable)))
- val resolvedAssignments = resolveAssignments(assignments, m, resolveValuesWithSourceOnly = true)
- InsertAction(resolvedCond, resolvedAssignments)
-
- case InsertStarAction(cond) =>
- // the insert action is used when not matched, so its condition and value can only
- // access columns from the source table
- val resolvedCond = cond.map(resolveCond("INSERT", _, Project(Nil, m.sourceTable)))
- val assignments = targetTable.output.map { attr =>
- Assignment(attr, UnresolvedAttribute(Seq(attr.name)))
- }
- val resolvedAssignments = resolveAssignments(assignments, m, resolveValuesWithSourceOnly = true)
- InsertAction(resolvedCond, resolvedAssignments)
-
- case _ =>
- throw new AnalysisException("Not matched actions can only contain INSERT")
- }
-
- val resolvedMergeCondition = resolveCond("SEARCH", context.mergeCondition, m)
-
- MergeIntoIcebergTable(
- targetTable,
- sourceTable,
- mergeCondition = resolvedMergeCondition,
- matchedActions = resolvedMatchedActions,
- notMatchedActions = resolvedNotMatchedActions)
- }
-
- private def resolveCond(condName: String, cond: Expression, plan: LogicalPlan): Expression = {
- val resolvedCond = analyzer.resolveExpressionByPlanChildren(cond, plan)
-
- val unresolvedAttrs = resolvedCond.references.filter(!_.resolved)
- if (unresolvedAttrs.nonEmpty) {
- throw new AnalysisException(
- s"Cannot resolve ${unresolvedAttrs.map(_.sql).mkString("[", ",", "]")} in $condName condition " +
- s"of MERGE operation given input columns: ${plan.inputSet.toSeq.map(_.sql).mkString("[", ",", "]")}")
- }
-
- resolvedCond
- }
-
- // copied from ResolveReferences in Spark
- private def resolveAssignments(
- assignments: Seq[Assignment],
- mergeInto: UnresolvedMergeIntoIcebergTable,
- resolveValuesWithSourceOnly: Boolean): Seq[Assignment] = {
- assignments.map { assign =>
- val resolvedKey = assign.key match {
- case c if !c.resolved =>
- resolveMergeExprOrFail(c, Project(Nil, mergeInto.targetTable))
- case o => o
- }
- val resolvedValue = assign.value match {
- // The update values may contain target and/or source references.
- case c if !c.resolved =>
- if (resolveValuesWithSourceOnly) {
- resolveMergeExprOrFail(c, Project(Nil, mergeInto.sourceTable))
- } else {
- resolveMergeExprOrFail(c, mergeInto)
- }
- case o => o
- }
- Assignment(resolvedKey, resolvedValue)
- }
- }
-
- // copied from ResolveReferences in Spark
- private def resolveMergeExprOrFail(e: Expression, p: LogicalPlan): Expression = {
- val resolved = analyzer.resolveExpressionByPlanChildren(e, p)
- resolved.references.filter(!_.resolved).foreach { a =>
- // Note: This will throw error only on unresolved attribute issues,
- // not other resolution errors like mismatched data types.
- val cols = p.inputSet.toSeq.map(_.sql).mkString(", ")
- a.failAnalysis(s"cannot resolve ${a.sql} in MERGE command given columns [$cols]")
- }
- resolved
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveProcedures.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveProcedures.scala
deleted file mode 100644
index ee69b5e344f0..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveProcedures.scala
+++ /dev/null
@@ -1,190 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.catalyst.analysis
-
-import java.util.Locale
-import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.catalyst.expressions.Literal
-import org.apache.spark.sql.catalyst.plans.logical.Call
-import org.apache.spark.sql.catalyst.plans.logical.CallArgument
-import org.apache.spark.sql.catalyst.plans.logical.CallStatement
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.plans.logical.NamedArgument
-import org.apache.spark.sql.catalyst.plans.logical.PositionalArgument
-import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.connector.catalog.CatalogManager
-import org.apache.spark.sql.connector.catalog.CatalogPlugin
-import org.apache.spark.sql.connector.catalog.LookupCatalog
-import org.apache.spark.sql.connector.iceberg.catalog.ProcedureCatalog
-import org.apache.spark.sql.connector.iceberg.catalog.ProcedureParameter
-import scala.collection.Seq
-
-case class ResolveProcedures(spark: SparkSession) extends Rule[LogicalPlan] with LookupCatalog {
-
- protected lazy val catalogManager: CatalogManager = spark.sessionState.catalogManager
-
- override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
- case CallStatement(CatalogAndIdentifier(catalog, ident), args) =>
- val procedure = catalog.asProcedureCatalog.loadProcedure(ident)
-
- val params = procedure.parameters
- val normalizedParams = normalizeParams(params)
- validateParams(normalizedParams)
-
- val normalizedArgs = normalizeArgs(args)
- Call(procedure, args = buildArgExprs(normalizedParams, normalizedArgs).toSeq)
- }
-
- private def validateParams(params: Seq[ProcedureParameter]): Unit = {
- // should not be any duplicate param names
- val duplicateParamNames = params.groupBy(_.name).collect {
- case (name, matchingParams) if matchingParams.length > 1 => name
- }
-
- if (duplicateParamNames.nonEmpty) {
- throw new AnalysisException(s"Duplicate parameter names: ${duplicateParamNames.mkString("[", ",", "]")}")
- }
-
- // optional params should be at the end
- params.sliding(2).foreach {
- case Seq(previousParam, currentParam) if !previousParam.required && currentParam.required =>
- throw new AnalysisException(
- s"Optional parameters must be after required ones but $currentParam is after $previousParam")
- case _ =>
- }
- }
-
- private def buildArgExprs(
- params: Seq[ProcedureParameter],
- args: Seq[CallArgument]): Seq[Expression] = {
-
- // build a map of declared parameter names to their positions
- val nameToPositionMap = params.map(_.name).zipWithIndex.toMap
-
- // build a map of parameter names to args
- val nameToArgMap = buildNameToArgMap(params, args, nameToPositionMap)
-
- // verify all required parameters are provided
- val missingParamNames = params.filter(_.required).collect {
- case param if !nameToArgMap.contains(param.name) => param.name
- }
-
- if (missingParamNames.nonEmpty) {
- throw new AnalysisException(s"Missing required parameters: ${missingParamNames.mkString("[", ",", "]")}")
- }
-
- val argExprs = new Array[Expression](params.size)
-
- nameToArgMap.foreach { case (name, arg) =>
- val position = nameToPositionMap(name)
- argExprs(position) = arg.expr
- }
-
- // assign nulls to optional params that were not set
- params.foreach {
- case p if !p.required && !nameToArgMap.contains(p.name) =>
- val position = nameToPositionMap(p.name)
- argExprs(position) = Literal.create(null, p.dataType)
- case _ =>
- }
-
- argExprs
- }
-
- private def buildNameToArgMap(
- params: Seq[ProcedureParameter],
- args: Seq[CallArgument],
- nameToPositionMap: Map[String, Int]): Map[String, CallArgument] = {
-
- val containsNamedArg = args.exists(_.isInstanceOf[NamedArgument])
- val containsPositionalArg = args.exists(_.isInstanceOf[PositionalArgument])
-
- if (containsNamedArg && containsPositionalArg) {
- throw new AnalysisException("Named and positional arguments cannot be mixed")
- }
-
- if (containsNamedArg) {
- buildNameToArgMapUsingNames(args, nameToPositionMap)
- } else {
- buildNameToArgMapUsingPositions(args, params)
- }
- }
-
- private def buildNameToArgMapUsingNames(
- args: Seq[CallArgument],
- nameToPositionMap: Map[String, Int]): Map[String, CallArgument] = {
-
- val namedArgs = args.asInstanceOf[Seq[NamedArgument]]
-
- val validationErrors = namedArgs.groupBy(_.name).collect {
- case (name, matchingArgs) if matchingArgs.size > 1 => s"Duplicate procedure argument: $name"
- case (name, _) if !nameToPositionMap.contains(name) => s"Unknown argument: $name"
- }
-
- if (validationErrors.nonEmpty) {
- throw new AnalysisException(s"Could not build name to arg map: ${validationErrors.mkString(", ")}")
- }
-
- namedArgs.map(arg => arg.name -> arg).toMap
- }
-
- private def buildNameToArgMapUsingPositions(
- args: Seq[CallArgument],
- params: Seq[ProcedureParameter]): Map[String, CallArgument] = {
-
- if (args.size > params.size) {
- throw new AnalysisException("Too many arguments for procedure")
- }
-
- args.zipWithIndex.map { case (arg, position) =>
- val param = params(position)
- param.name -> arg
- }.toMap
- }
-
- private def normalizeParams(params: Seq[ProcedureParameter]): Seq[ProcedureParameter] = {
- params.map {
- case param if param.required =>
- val normalizedName = param.name.toLowerCase(Locale.ROOT)
- ProcedureParameter.required(normalizedName, param.dataType)
- case param =>
- val normalizedName = param.name.toLowerCase(Locale.ROOT)
- ProcedureParameter.optional(normalizedName, param.dataType)
- }
- }
-
- private def normalizeArgs(args: Seq[CallArgument]): Seq[CallArgument] = {
- args.map {
- case a @ NamedArgument(name, _) => a.copy(name = name.toLowerCase(Locale.ROOT))
- case other => other
- }
- }
-
- implicit class CatalogHelper(plugin: CatalogPlugin) {
- def asProcedureCatalog: ProcedureCatalog = plugin match {
- case procedureCatalog: ProcedureCatalog =>
- procedureCatalog
- case _ =>
- throw new AnalysisException(s"Cannot use catalog ${plugin.name}: not a ProcedureCatalog")
- }
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteDeleteFromTable.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteDeleteFromTable.scala
deleted file mode 100644
index bbf7828eb4b4..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteDeleteFromTable.scala
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.catalyst.analysis
-
-import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.expressions.Alias
-import org.apache.spark.sql.catalyst.expressions.EqualNullSafe
-import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.catalyst.expressions.Literal
-import org.apache.spark.sql.catalyst.expressions.Not
-import org.apache.spark.sql.catalyst.plans.logical.DeleteFromIcebergTable
-import org.apache.spark.sql.catalyst.plans.logical.Filter
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.plans.logical.Project
-import org.apache.spark.sql.catalyst.plans.logical.ReplaceData
-import org.apache.spark.sql.catalyst.plans.logical.WriteDelta
-import org.apache.spark.sql.catalyst.util.RowDeltaUtils._
-import org.apache.spark.sql.connector.iceberg.catalog.SupportsRowLevelOperations
-import org.apache.spark.sql.connector.iceberg.write.RowLevelOperation.Command.DELETE
-import org.apache.spark.sql.connector.iceberg.write.SupportsDelta
-import org.apache.spark.sql.connector.write.RowLevelOperationTable
-import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
-
-/**
- * Assigns a rewrite plan for v2 tables that support rewriting data to handle DELETE statements.
- *
- * If a table implements SupportsDelete and SupportsRowLevelOperations, this rule assigns a rewrite
- * plan but the optimizer will check whether this particular DELETE statement can be handled
- * by simply passing delete filters to the connector. If yes, the optimizer will then discard
- * the rewrite plan.
- */
-object RewriteDeleteFromTable extends RewriteRowLevelCommand {
-
- override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
- case d @ DeleteFromIcebergTable(aliasedTable, Some(cond), None) if d.resolved =>
- EliminateSubqueryAliases(aliasedTable) match {
- case r @ DataSourceV2Relation(tbl: SupportsRowLevelOperations, _, _, _, _) =>
- val operation = buildRowLevelOperation(tbl, DELETE)
- val table = RowLevelOperationTable(tbl, operation)
- val rewritePlan = operation match {
- case _: SupportsDelta =>
- buildWriteDeltaPlan(r, table, cond)
- case _ =>
- buildReplaceDataPlan(r, table, cond)
- }
- // keep the original relation in DELETE to try deleting using filters
- DeleteFromIcebergTable(r, Some(cond), Some(rewritePlan))
-
- case p =>
- throw new AnalysisException(s"$p is not an Iceberg table")
- }
- }
-
- // build a rewrite plan for sources that support replacing groups of data (e.g. files, partitions)
- private def buildReplaceDataPlan(
- relation: DataSourceV2Relation,
- operationTable: RowLevelOperationTable,
- cond: Expression): ReplaceData = {
-
- // resolve all needed attrs (e.g. metadata attrs for grouping data on write)
- val metadataAttrs = resolveRequiredMetadataAttrs(relation, operationTable.operation)
-
- // construct a read relation and include all required metadata columns
- val readRelation = buildReadRelation(relation, operationTable, metadataAttrs)
-
- // construct a plan that contains unmatched rows in matched groups that must be carried over
- // such rows do not match the condition but have to be copied over as the source can replace
- // only groups of rows
- val remainingRowsFilter = Not(EqualNullSafe(cond, Literal.TrueLiteral))
- val remainingRowsPlan = Filter(remainingRowsFilter, readRelation)
-
- // build a plan to replace read groups in the table
- val writeRelation = relation.copy(table = operationTable)
- ReplaceData(writeRelation, remainingRowsPlan, relation)
- }
-
- // build a rewrite plan for sources that support row deltas
- private def buildWriteDeltaPlan(
- relation: DataSourceV2Relation,
- operationTable: RowLevelOperationTable,
- cond: Expression): WriteDelta = {
-
- // resolve all needed attrs (e.g. row ID and any required metadata attrs)
- val rowIdAttrs = resolveRowIdAttrs(relation, operationTable.operation)
- val metadataAttrs = resolveRequiredMetadataAttrs(relation, operationTable.operation)
-
- // construct a read relation and include all required metadata columns
- val readRelation = buildReadRelation(relation, operationTable, metadataAttrs, rowIdAttrs)
-
- // construct a plan that only contains records to delete
- val deletedRowsPlan = Filter(cond, readRelation)
- val operationType = Alias(Literal(DELETE_OPERATION), OPERATION_COLUMN)()
- val requiredWriteAttrs = dedupAttrs(rowIdAttrs ++ metadataAttrs)
- val project = Project(operationType +: requiredWriteAttrs, deletedRowsPlan)
-
- // build a plan to write deletes to the table
- val writeRelation = relation.copy(table = operationTable)
- val projections = buildWriteDeltaProjections(project, Nil, rowIdAttrs, metadataAttrs)
- WriteDelta(writeRelation, project, relation, projections)
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteMergeIntoTable.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteMergeIntoTable.scala
deleted file mode 100644
index 31b2cfe05c48..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteMergeIntoTable.scala
+++ /dev/null
@@ -1,442 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.catalyst.analysis
-
-import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.ProjectingInternalRow
-import org.apache.spark.sql.catalyst.expressions.Alias
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.catalyst.expressions.AttributeReference
-import org.apache.spark.sql.catalyst.expressions.AttributeSet
-import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.catalyst.expressions.ExtendedV2ExpressionUtils
-import org.apache.spark.sql.catalyst.expressions.IsNotNull
-import org.apache.spark.sql.catalyst.expressions.Literal
-import org.apache.spark.sql.catalyst.expressions.Literal.FalseLiteral
-import org.apache.spark.sql.catalyst.expressions.Literal.TrueLiteral
-import org.apache.spark.sql.catalyst.expressions.MonotonicallyIncreasingID
-import org.apache.spark.sql.catalyst.plans.FullOuter
-import org.apache.spark.sql.catalyst.plans.Inner
-import org.apache.spark.sql.catalyst.plans.LeftAnti
-import org.apache.spark.sql.catalyst.plans.LeftOuter
-import org.apache.spark.sql.catalyst.plans.RightOuter
-import org.apache.spark.sql.catalyst.plans.logical.AppendData
-import org.apache.spark.sql.catalyst.plans.logical.DeleteAction
-import org.apache.spark.sql.catalyst.plans.logical.Filter
-import org.apache.spark.sql.catalyst.plans.logical.HintInfo
-import org.apache.spark.sql.catalyst.plans.logical.InsertAction
-import org.apache.spark.sql.catalyst.plans.logical.Join
-import org.apache.spark.sql.catalyst.plans.logical.JoinHint
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.plans.logical.MergeAction
-import org.apache.spark.sql.catalyst.plans.logical.MergeIntoIcebergTable
-import org.apache.spark.sql.catalyst.plans.logical.MergeRows
-import org.apache.spark.sql.catalyst.plans.logical.NO_BROADCAST_HASH
-import org.apache.spark.sql.catalyst.plans.logical.NoStatsUnaryNode
-import org.apache.spark.sql.catalyst.plans.logical.Project
-import org.apache.spark.sql.catalyst.plans.logical.ReplaceData
-import org.apache.spark.sql.catalyst.plans.logical.UpdateAction
-import org.apache.spark.sql.catalyst.plans.logical.WriteDelta
-import org.apache.spark.sql.catalyst.util.RowDeltaUtils._
-import org.apache.spark.sql.catalyst.util.WriteDeltaProjections
-import org.apache.spark.sql.connector.expressions.FieldReference
-import org.apache.spark.sql.connector.expressions.NamedReference
-import org.apache.spark.sql.connector.iceberg.catalog.SupportsRowLevelOperations
-import org.apache.spark.sql.connector.iceberg.write.RowLevelOperation.Command.MERGE
-import org.apache.spark.sql.connector.iceberg.write.SupportsDelta
-import org.apache.spark.sql.connector.write.RowLevelOperationTable
-import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
-import org.apache.spark.sql.types.IntegerType
-import org.apache.spark.sql.types.StructField
-import org.apache.spark.sql.types.StructType
-
-/**
- * Assigns a rewrite plan for v2 tables that support rewriting data to handle MERGE statements.
- *
- * This rule assumes the commands have been fully resolved and all assignments have been aligned.
- * That's why it must be run after AlignRowLevelCommandAssignments.
- */
-object RewriteMergeIntoTable extends RewriteRowLevelCommand {
-
- private final val ROW_FROM_SOURCE = "__row_from_source"
- private final val ROW_FROM_TARGET = "__row_from_target"
- private final val ROW_ID = "__row_id"
-
- private final val ROW_FROM_SOURCE_REF = FieldReference(ROW_FROM_SOURCE)
- private final val ROW_FROM_TARGET_REF = FieldReference(ROW_FROM_TARGET)
- private final val ROW_ID_REF = FieldReference(ROW_ID)
-
- override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
- case m @ MergeIntoIcebergTable(aliasedTable, source, cond, matchedActions, notMatchedActions, None)
- if m.resolved && m.aligned && matchedActions.isEmpty && notMatchedActions.size == 1 =>
-
- EliminateSubqueryAliases(aliasedTable) match {
- case r: DataSourceV2Relation =>
- // NOT MATCHED conditions may only refer to columns in source so they can be pushed down
- val insertAction = notMatchedActions.head.asInstanceOf[InsertAction]
- val filteredSource = insertAction.condition match {
- case Some(insertCond) => Filter(insertCond, source)
- case None => source
- }
-
- // when there are no MATCHED actions, use a left anti join to remove any matching rows
- // and switch to using a regular append instead of a row-level merge
- // only unmatched source rows that match the condition are appended to the table
- val joinPlan = Join(filteredSource, r, LeftAnti, Some(cond), JoinHint.NONE)
-
- val outputExprs = insertAction.assignments.map(_.value)
- val outputColNames = r.output.map(_.name)
- val outputCols = outputExprs.zip(outputColNames).map { case (expr, name) =>
- Alias(expr, name)()
- }
- val project = Project(outputCols, joinPlan)
-
- AppendData.byPosition(r, project)
-
- case p =>
- throw new AnalysisException(s"$p is not an Iceberg table")
- }
-
- case m @ MergeIntoIcebergTable(aliasedTable, source, cond, matchedActions, notMatchedActions, None)
- if m.resolved && m.aligned && matchedActions.isEmpty =>
-
- EliminateSubqueryAliases(aliasedTable) match {
- case r: DataSourceV2Relation =>
- // when there are no MATCHED actions, use a left anti join to remove any matching rows
- // and switch to using a regular append instead of a row-level merge
- // only unmatched source rows that match action conditions are appended to the table
- val joinPlan = Join(source, r, LeftAnti, Some(cond), JoinHint.NONE)
-
- val notMatchedConditions = notMatchedActions.map(actionCondition)
- val notMatchedOutputs = notMatchedActions.map(actionOutput(_, Nil))
-
- // merge rows as there are multiple not matched actions
- val mergeRows = MergeRows(
- isSourceRowPresent = TrueLiteral,
- isTargetRowPresent = FalseLiteral,
- matchedConditions = Nil,
- matchedOutputs = Nil,
- notMatchedConditions = notMatchedConditions,
- notMatchedOutputs = notMatchedOutputs,
- targetOutput = Nil,
- rowIdAttrs = Nil,
- performCardinalityCheck = false,
- emitNotMatchedTargetRows = false,
- output = buildMergeRowsOutput(Nil, notMatchedOutputs, r.output),
- joinPlan)
-
- AppendData.byPosition(r, mergeRows)
-
- case p =>
- throw new AnalysisException(s"$p is not an Iceberg table")
- }
-
- case m @ MergeIntoIcebergTable(aliasedTable, source, cond, matchedActions, notMatchedActions, None)
- if m.resolved && m.aligned =>
-
- EliminateSubqueryAliases(aliasedTable) match {
- case r @ DataSourceV2Relation(tbl: SupportsRowLevelOperations, _, _, _, _) =>
- val operation = buildRowLevelOperation(tbl, MERGE)
- val table = RowLevelOperationTable(tbl, operation)
- val rewritePlan = operation match {
- case _: SupportsDelta =>
- buildWriteDeltaPlan(r, table, source, cond, matchedActions, notMatchedActions)
- case _ =>
- buildReplaceDataPlan(r, table, source, cond, matchedActions, notMatchedActions)
- }
-
- m.copy(rewritePlan = Some(rewritePlan))
-
- case p =>
- throw new AnalysisException(s"$p is not an Iceberg table")
- }
- }
-
- // build a rewrite plan for sources that support replacing groups of data (e.g. files, partitions)
- private def buildReplaceDataPlan(
- relation: DataSourceV2Relation,
- operationTable: RowLevelOperationTable,
- source: LogicalPlan,
- cond: Expression,
- matchedActions: Seq[MergeAction],
- notMatchedActions: Seq[MergeAction]): ReplaceData = {
-
- // resolve all needed attrs (e.g. metadata attrs for grouping data on write)
- val metadataAttrs = resolveRequiredMetadataAttrs(relation, operationTable.operation)
-
- // construct a scan relation and include all required metadata columns
- val readRelation = buildReadRelation(relation, operationTable, metadataAttrs)
- val readAttrs = readRelation.output
-
- // project an extra column to check if a target row exists after the join
- // project a synthetic row ID to perform the cardinality check
- val rowFromTarget = Alias(TrueLiteral, ROW_FROM_TARGET)()
- val rowId = Alias(MonotonicallyIncreasingID(), ROW_ID)()
- val targetTableProjExprs = readAttrs ++ Seq(rowFromTarget, rowId)
- val targetTableProj = Project(targetTableProjExprs, readRelation)
-
- // project an extra column to check if a source row exists after the join
- val rowFromSource = Alias(TrueLiteral, ROW_FROM_SOURCE)()
- val sourceTableProjExprs = source.output :+ rowFromSource
- val sourceTableProj = Project(sourceTableProjExprs, source)
-
- // use left outer join if there is no NOT MATCHED action, unmatched source rows can be discarded
- // use full outer join in all other cases, unmatched source rows may be needed
- // disable broadcasts for the target table to perform the cardinality check
- val joinType = if (notMatchedActions.isEmpty) LeftOuter else FullOuter
- val joinHint = JoinHint(leftHint = Some(HintInfo(Some(NO_BROADCAST_HASH))), rightHint = None)
- val joinPlan = Join(NoStatsUnaryNode(targetTableProj), sourceTableProj, joinType, Some(cond), joinHint)
-
- // add an extra matched action to output the original row if none of the actual actions matched
- // this is needed to keep target rows that should be copied over
- val matchedConditions = matchedActions.map(actionCondition) :+ TrueLiteral
- val matchedOutputs = matchedActions.map(actionOutput(_, metadataAttrs)) :+ readAttrs
-
- val notMatchedConditions = notMatchedActions.map(actionCondition)
- val notMatchedOutputs = notMatchedActions.map(actionOutput(_, metadataAttrs))
-
- val rowIdAttr = resolveAttrRef(ROW_ID_REF, joinPlan)
- val rowFromSourceAttr = resolveAttrRef(ROW_FROM_SOURCE_REF, joinPlan)
- val rowFromTargetAttr = resolveAttrRef(ROW_FROM_TARGET_REF, joinPlan)
-
- val mergeRows = MergeRows(
- isSourceRowPresent = IsNotNull(rowFromSourceAttr),
- isTargetRowPresent = if (notMatchedActions.isEmpty) TrueLiteral else IsNotNull(rowFromTargetAttr),
- matchedConditions = matchedConditions,
- matchedOutputs = matchedOutputs,
- notMatchedConditions = notMatchedConditions,
- notMatchedOutputs = notMatchedOutputs,
- targetOutput = readAttrs,
- rowIdAttrs = Seq(rowIdAttr),
- performCardinalityCheck = isCardinalityCheckNeeded(matchedActions),
- emitNotMatchedTargetRows = true,
- output = buildMergeRowsOutput(matchedOutputs, notMatchedOutputs, readAttrs),
- joinPlan)
-
- // build a plan to replace read groups in the table
- val writeRelation = relation.copy(table = operationTable)
- ReplaceData(writeRelation, mergeRows, relation)
- }
-
- // build a rewrite plan for sources that support row deltas
- private def buildWriteDeltaPlan(
- relation: DataSourceV2Relation,
- operationTable: RowLevelOperationTable,
- source: LogicalPlan,
- cond: Expression,
- matchedActions: Seq[MergeAction],
- notMatchedActions: Seq[MergeAction]): WriteDelta = {
-
- // resolve all needed attrs (e.g. row ID and any required metadata attrs)
- val rowAttrs = relation.output
- val rowIdAttrs = resolveRowIdAttrs(relation, operationTable.operation)
- val metadataAttrs = resolveRequiredMetadataAttrs(relation, operationTable.operation)
-
- // construct a scan relation and include all required metadata columns
- val readRelation = buildReadRelation(relation, operationTable, metadataAttrs, rowIdAttrs)
- val readAttrs = readRelation.output
-
- // project an extra column to check if a target row exists after the join
- val targetTableProjExprs = readAttrs :+ Alias(TrueLiteral, ROW_FROM_TARGET)()
- val targetTableProj = Project(targetTableProjExprs, readRelation)
-
- // project an extra column to check if a source row exists after the join
- val sourceTableProjExprs = source.output :+ Alias(TrueLiteral, ROW_FROM_SOURCE)()
- val sourceTableProj = Project(sourceTableProjExprs, source)
-
- // use inner join if there is no NOT MATCHED action, unmatched source rows can be discarded
- // use right outer join in all other cases, unmatched source rows may be needed
- // also disable broadcasts for the target table to perform the cardinality check
- val joinType = if (notMatchedActions.isEmpty) Inner else RightOuter
- val joinHint = JoinHint(leftHint = Some(HintInfo(Some(NO_BROADCAST_HASH))), rightHint = None)
- val joinPlan = Join(NoStatsUnaryNode(targetTableProj), sourceTableProj, joinType, Some(cond), joinHint)
-
- val deleteRowValues = buildDeltaDeleteRowValues(rowAttrs, rowIdAttrs)
- val metadataReadAttrs = readAttrs.filterNot(relation.outputSet.contains)
-
- val matchedConditions = matchedActions.map(actionCondition)
- val matchedOutputs = matchedActions.map(deltaActionOutput(_, deleteRowValues, metadataReadAttrs))
-
- val notMatchedConditions = notMatchedActions.map(actionCondition)
- val notMatchedOutputs = notMatchedActions.map(deltaActionOutput(_, deleteRowValues, metadataReadAttrs))
-
- val operationTypeAttr = AttributeReference(OPERATION_COLUMN, IntegerType, nullable = false)()
- val rowFromSourceAttr = resolveAttrRef(ROW_FROM_SOURCE_REF, joinPlan)
- val rowFromTargetAttr = resolveAttrRef(ROW_FROM_TARGET_REF, joinPlan)
-
- // merged rows must contain values for the operation type and all read attrs
- val mergeRowsOutput = buildMergeRowsOutput(matchedOutputs, notMatchedOutputs, operationTypeAttr +: readAttrs)
-
- val mergeRows = MergeRows(
- isSourceRowPresent = IsNotNull(rowFromSourceAttr),
- isTargetRowPresent = if (notMatchedActions.isEmpty) TrueLiteral else IsNotNull(rowFromTargetAttr),
- matchedConditions = matchedConditions,
- matchedOutputs = matchedOutputs,
- notMatchedConditions = notMatchedConditions,
- notMatchedOutputs = notMatchedOutputs,
- // only needed if emitting unmatched target rows
- targetOutput = Nil,
- rowIdAttrs = rowIdAttrs,
- performCardinalityCheck = isCardinalityCheckNeeded(matchedActions),
- emitNotMatchedTargetRows = false,
- output = mergeRowsOutput,
- joinPlan)
-
- // build a plan to write the row delta to the table
- val writeRelation = relation.copy(table = operationTable)
- val projections = buildMergeDeltaProjections(mergeRows, rowAttrs, rowIdAttrs, metadataAttrs)
- WriteDelta(writeRelation, mergeRows, relation, projections)
- }
-
- private def actionCondition(action: MergeAction): Expression = {
- action.condition.getOrElse(TrueLiteral)
- }
-
- private def actionOutput(
- clause: MergeAction,
- metadataAttrs: Seq[Attribute]): Seq[Expression] = {
-
- clause match {
- case u: UpdateAction =>
- u.assignments.map(_.value) ++ metadataAttrs
-
- case _: DeleteAction =>
- Nil
-
- case i: InsertAction =>
- i.assignments.map(_.value) ++ metadataAttrs.map(attr => Literal(null, attr.dataType))
-
- case other =>
- throw new AnalysisException(s"Unexpected action: $other")
- }
- }
-
- private def deltaActionOutput(
- action: MergeAction,
- deleteRowValues: Seq[Expression],
- metadataAttrs: Seq[Attribute]): Seq[Expression] = {
-
- action match {
- case u: UpdateAction =>
- Seq(Literal(UPDATE_OPERATION)) ++ u.assignments.map(_.value) ++ metadataAttrs
-
- case _: DeleteAction =>
- Seq(Literal(DELETE_OPERATION)) ++ deleteRowValues ++ metadataAttrs
-
- case i: InsertAction =>
- val metadataAttrValues = metadataAttrs.map(attr => Literal(null, attr.dataType))
- Seq(Literal(INSERT_OPERATION)) ++ i.assignments.map(_.value) ++ metadataAttrValues
-
- case other =>
- throw new AnalysisException(s"Unexpected action: $other")
- }
- }
-
- private def buildMergeRowsOutput(
- matchedOutputs: Seq[Seq[Expression]],
- notMatchedOutputs: Seq[Seq[Expression]],
- attrs: Seq[Attribute]): Seq[Attribute] = {
-
- // collect all outputs from matched and not matched actions (ignoring DELETEs)
- val outputs = matchedOutputs.filter(_.nonEmpty) ++ notMatchedOutputs.filter(_.nonEmpty)
-
- // build a correct nullability map for output attributes
- // an attribute is nullable if at least one matched or not matched action may produce null
- val nullabilityMap = attrs.indices.map { index =>
- index -> outputs.exists(output => output(index).nullable)
- }.toMap
-
- attrs.zipWithIndex.map { case (attr, index) =>
- AttributeReference(attr.name, attr.dataType, nullabilityMap(index), attr.metadata)()
- }
- }
-
- private def isCardinalityCheckNeeded(actions: Seq[MergeAction]): Boolean = actions match {
- case Seq(DeleteAction(None)) => false
- case _ => true
- }
-
- private def buildDeltaDeleteRowValues(
- rowAttrs: Seq[Attribute],
- rowIdAttrs: Seq[Attribute]): Seq[Expression] = {
-
- // nullify all row attrs that are not part of the row ID
- val rowIdAttSet = AttributeSet(rowIdAttrs)
- rowAttrs.map {
- case attr if rowIdAttSet.contains(attr) => attr
- case attr => Literal(null, attr.dataType)
- }
- }
-
- private def resolveAttrRef(ref: NamedReference, plan: LogicalPlan): AttributeReference = {
- ExtendedV2ExpressionUtils.resolveRef[AttributeReference](ref, plan)
- }
-
- private def buildMergeDeltaProjections(
- mergeRows: MergeRows,
- rowAttrs: Seq[Attribute],
- rowIdAttrs: Seq[Attribute],
- metadataAttrs: Seq[Attribute]): WriteDeltaProjections = {
-
- val outputAttrs = mergeRows.output
-
- val outputs = mergeRows.matchedOutputs ++ mergeRows.notMatchedOutputs
- val insertAndUpdateOutputs = outputs.filterNot(_.head == Literal(DELETE_OPERATION))
- val updateAndDeleteOutputs = outputs.filterNot(_.head == Literal(INSERT_OPERATION))
-
- val rowProjection = if (rowAttrs.nonEmpty) {
- Some(newLazyProjection(insertAndUpdateOutputs, outputAttrs, rowAttrs))
- } else {
- None
- }
-
- val rowIdProjection = newLazyProjection(updateAndDeleteOutputs, outputAttrs, rowIdAttrs)
-
- val metadataProjection = if (metadataAttrs.nonEmpty) {
- Some(newLazyProjection(updateAndDeleteOutputs, outputAttrs, metadataAttrs))
- } else {
- None
- }
-
- WriteDeltaProjections(rowProjection, rowIdProjection, metadataProjection)
- }
-
- // the projection is done by name, ignoring expr IDs
- private def newLazyProjection(
- outputs: Seq[Seq[Expression]],
- outputAttrs: Seq[Attribute],
- projectedAttrs: Seq[Attribute]): ProjectingInternalRow = {
-
- val projectedOrdinals = projectedAttrs.map(attr => outputAttrs.indexWhere(_.name == attr.name))
-
- val structFields = projectedAttrs.zip(projectedOrdinals).map { case (attr, ordinal) =>
- // output attr is nullable if at least one action may produce null for that attr
- // but row ID and metadata attrs are projected only in update/delete actions and
- // row attrs are projected only in insert/update actions
- // that's why the projection schema must rely only on relevant action outputs
- // instead of blindly inheriting the output attr nullability
- val nullable = outputs.exists(output => output(ordinal).nullable)
- StructField(attr.name, attr.dataType, nullable, attr.metadata)
- }
- val schema = StructType(structFields)
-
- ProjectingInternalRow(schema, projectedOrdinals)
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteRowLevelCommand.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteRowLevelCommand.scala
deleted file mode 100644
index f3f655181417..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteRowLevelCommand.scala
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.catalyst.analysis
-
-import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.ProjectingInternalRow
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.catalyst.expressions.AttributeReference
-import org.apache.spark.sql.catalyst.expressions.ExprId
-import org.apache.spark.sql.catalyst.expressions.ExtendedV2ExpressionUtils
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.catalyst.util.WriteDeltaProjections
-import org.apache.spark.sql.connector.iceberg.catalog.SupportsRowLevelOperations
-import org.apache.spark.sql.connector.iceberg.write.RowLevelOperation
-import org.apache.spark.sql.connector.iceberg.write.RowLevelOperation.Command
-import org.apache.spark.sql.connector.iceberg.write.SupportsDelta
-import org.apache.spark.sql.connector.write.RowLevelOperationInfoImpl
-import org.apache.spark.sql.connector.write.RowLevelOperationTable
-import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
-import org.apache.spark.sql.types.StructType
-import org.apache.spark.sql.util.CaseInsensitiveStringMap
-import scala.collection.mutable
-
-trait RewriteRowLevelCommand extends Rule[LogicalPlan] {
-
- protected def buildRowLevelOperation(
- table: SupportsRowLevelOperations,
- command: Command): RowLevelOperation = {
- val info = RowLevelOperationInfoImpl(command, CaseInsensitiveStringMap.empty())
- val builder = table.newRowLevelOperationBuilder(info)
- builder.build()
- }
-
- protected def buildReadRelation(
- relation: DataSourceV2Relation,
- table: RowLevelOperationTable,
- metadataAttrs: Seq[AttributeReference],
- rowIdAttrs: Seq[AttributeReference] = Nil): DataSourceV2Relation = {
-
- val attrs = dedupAttrs(relation.output ++ rowIdAttrs ++ metadataAttrs)
- relation.copy(table = table, output = attrs)
- }
-
- protected def dedupAttrs(attrs: Seq[AttributeReference]): Seq[AttributeReference] = {
- val exprIds = mutable.Set.empty[ExprId]
- attrs.flatMap { attr =>
- if (exprIds.contains(attr.exprId)) {
- None
- } else {
- exprIds += attr.exprId
- Some(attr)
- }
- }
- }
-
- protected def buildWriteDeltaProjections(
- plan: LogicalPlan,
- rowAttrs: Seq[Attribute],
- rowIdAttrs: Seq[Attribute],
- metadataAttrs: Seq[Attribute]): WriteDeltaProjections = {
-
- val rowProjection = if (rowAttrs.nonEmpty) {
- Some(newLazyProjection(plan, rowAttrs))
- } else {
- None
- }
-
- val rowIdProjection = newLazyProjection(plan, rowIdAttrs)
-
- val metadataProjection = if (metadataAttrs.nonEmpty) {
- Some(newLazyProjection(plan, metadataAttrs))
- } else {
- None
- }
-
- WriteDeltaProjections(rowProjection, rowIdProjection, metadataProjection)
- }
-
- // the projection is done by name, ignoring expr IDs
- private def newLazyProjection(
- plan: LogicalPlan,
- projectedAttrs: Seq[Attribute]): ProjectingInternalRow = {
-
- val projectedOrdinals = projectedAttrs.map(attr => plan.output.indexWhere(_.name == attr.name))
- val schema = StructType.fromAttributes(projectedOrdinals.map(plan.output(_)))
- ProjectingInternalRow(schema, projectedOrdinals)
- }
-
- protected def resolveRequiredMetadataAttrs(
- relation: DataSourceV2Relation,
- operation: RowLevelOperation): Seq[AttributeReference] = {
-
- ExtendedV2ExpressionUtils.resolveRefs[AttributeReference](
- operation.requiredMetadataAttributes.toSeq,
- relation)
- }
-
- protected def resolveRowIdAttrs(
- relation: DataSourceV2Relation,
- operation: RowLevelOperation): Seq[AttributeReference] = {
-
- operation match {
- case supportsDelta: SupportsDelta =>
- val rowIdAttrs = ExtendedV2ExpressionUtils.resolveRefs[AttributeReference](
- supportsDelta.rowId.toSeq,
- relation)
-
- val nullableRowIdAttrs = rowIdAttrs.filter(_.nullable)
- if (nullableRowIdAttrs.nonEmpty) {
- throw new AnalysisException(s"Row ID attrs cannot be nullable: $nullableRowIdAttrs")
- }
-
- rowIdAttrs
-
- case other =>
- throw new AnalysisException(s"Operation $other does not support deltas")
- }
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteUpdateTable.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteUpdateTable.scala
deleted file mode 100644
index 110b38d314b7..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteUpdateTable.scala
+++ /dev/null
@@ -1,183 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.catalyst.analysis
-
-import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.expressions.Alias
-import org.apache.spark.sql.catalyst.expressions.EqualNullSafe
-import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.catalyst.expressions.If
-import org.apache.spark.sql.catalyst.expressions.Literal
-import org.apache.spark.sql.catalyst.expressions.Not
-import org.apache.spark.sql.catalyst.expressions.SubqueryExpression
-import org.apache.spark.sql.catalyst.plans.logical.Assignment
-import org.apache.spark.sql.catalyst.plans.logical.Filter
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.plans.logical.Project
-import org.apache.spark.sql.catalyst.plans.logical.ReplaceData
-import org.apache.spark.sql.catalyst.plans.logical.Union
-import org.apache.spark.sql.catalyst.plans.logical.UpdateIcebergTable
-import org.apache.spark.sql.catalyst.plans.logical.WriteDelta
-import org.apache.spark.sql.catalyst.util.RowDeltaUtils._
-import org.apache.spark.sql.connector.iceberg.catalog.SupportsRowLevelOperations
-import org.apache.spark.sql.connector.iceberg.write.RowLevelOperation.Command.UPDATE
-import org.apache.spark.sql.connector.iceberg.write.SupportsDelta
-import org.apache.spark.sql.connector.write.RowLevelOperationTable
-import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
-
-/**
- * Assigns a rewrite plan for v2 tables that support rewriting data to handle UPDATE statements.
- *
- * This rule assumes the commands have been fully resolved and all assignments have been aligned.
- * That's why it must be run after AlignRowLevelCommandAssignments.
- *
- * This rule also must be run in the same batch with DeduplicateRelations in Spark.
- */
-object RewriteUpdateTable extends RewriteRowLevelCommand {
-
- override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
- case u @ UpdateIcebergTable(aliasedTable, assignments, cond, None) if u.resolved && u.aligned =>
- EliminateSubqueryAliases(aliasedTable) match {
- case r @ DataSourceV2Relation(tbl: SupportsRowLevelOperations, _, _, _, _) =>
- val operation = buildRowLevelOperation(tbl, UPDATE)
- val table = RowLevelOperationTable(tbl, operation)
- val updateCond = cond.getOrElse(Literal.TrueLiteral)
- val rewritePlan = operation match {
- case _: SupportsDelta =>
- buildWriteDeltaPlan(r, table, assignments, updateCond)
- case _ if SubqueryExpression.hasSubquery(updateCond) =>
- buildReplaceDataWithUnionPlan(r, table, assignments, updateCond)
- case _ =>
- buildReplaceDataPlan(r, table, assignments, updateCond)
- }
- UpdateIcebergTable(r, assignments, cond, Some(rewritePlan))
-
- case p =>
- throw new AnalysisException(s"$p is not an Iceberg table")
- }
- }
-
- // build a rewrite plan for sources that support replacing groups of data (e.g. files, partitions)
- // if the condition does NOT contain a subquery
- private def buildReplaceDataPlan(
- relation: DataSourceV2Relation,
- operationTable: RowLevelOperationTable,
- assignments: Seq[Assignment],
- cond: Expression): ReplaceData = {
-
- // resolve all needed attrs (e.g. metadata attrs for grouping data on write)
- val metadataAttrs = resolveRequiredMetadataAttrs(relation, operationTable.operation)
-
- // construct a read relation and include all required metadata columns
- val readRelation = buildReadRelation(relation, operationTable, metadataAttrs)
-
- // build a plan with updated and copied over records
- val updatedAndRemainingRowsPlan = buildUpdateProjection(readRelation, assignments, cond)
-
- // build a plan to replace read groups in the table
- val writeRelation = relation.copy(table = operationTable)
- ReplaceData(writeRelation, updatedAndRemainingRowsPlan, relation)
- }
-
- // build a rewrite plan for sources that support replacing groups of data (e.g. files, partitions)
- // if the condition contains a subquery
- private def buildReplaceDataWithUnionPlan(
- relation: DataSourceV2Relation,
- operationTable: RowLevelOperationTable,
- assignments: Seq[Assignment],
- cond: Expression): ReplaceData = {
-
- // resolve all needed attrs (e.g. metadata attrs for grouping data on write)
- val metadataAttrs = resolveRequiredMetadataAttrs(relation, operationTable.operation)
-
- // construct a read relation and include all required metadata columns
- // the same read relation will be used to read records that must be updated and be copied over
- // DeduplicateRelations will take care of duplicated attr IDs
- val readRelation = buildReadRelation(relation, operationTable, metadataAttrs)
-
- // build a plan for records that match the cond and should be updated
- val matchedRowsPlan = Filter(cond, readRelation)
- val updatedRowsPlan = buildUpdateProjection(matchedRowsPlan, assignments)
-
- // build a plan for records that did not match the cond but had to be copied over
- val remainingRowFilter = Not(EqualNullSafe(cond, Literal.TrueLiteral))
- val remainingRowsPlan = Filter(remainingRowFilter, readRelation)
-
- // new state is a union of updated and copied over records
- val updatedAndRemainingRowsPlan = Union(updatedRowsPlan, remainingRowsPlan)
-
- // build a plan to replace read groups in the table
- val writeRelation = relation.copy(table = operationTable)
- ReplaceData(writeRelation, updatedAndRemainingRowsPlan, relation)
- }
-
- // build a rewrite plan for sources that support row deltas
- private def buildWriteDeltaPlan(
- relation: DataSourceV2Relation,
- operationTable: RowLevelOperationTable,
- assignments: Seq[Assignment],
- cond: Expression): WriteDelta = {
-
- // resolve all needed attrs (e.g. row ID and any required metadata attrs)
- val rowAttrs = relation.output
- val rowIdAttrs = resolveRowIdAttrs(relation, operationTable.operation)
- val metadataAttrs = resolveRequiredMetadataAttrs(relation, operationTable.operation)
-
- // construct a scan relation and include all required metadata columns
- val readRelation = buildReadRelation(relation, operationTable, metadataAttrs, rowIdAttrs)
-
- // build a plan for updated records that match the cond
- val matchedRowsPlan = Filter(cond, readRelation)
- val updatedRowsPlan = buildUpdateProjection(matchedRowsPlan, assignments)
- val operationType = Alias(Literal(UPDATE_OPERATION), OPERATION_COLUMN)()
- val project = Project(operationType +: updatedRowsPlan.output, updatedRowsPlan)
-
- // build a plan to write the row delta to the table
- val writeRelation = relation.copy(table = operationTable)
- val projections = buildWriteDeltaProjections(project, rowAttrs, rowIdAttrs, metadataAttrs)
- WriteDelta(writeRelation, project, relation, projections)
- }
-
- // this method assumes the assignments have been already aligned before
- // the condition passed to this method may be different from the UPDATE condition
- private def buildUpdateProjection(
- plan: LogicalPlan,
- assignments: Seq[Assignment],
- cond: Expression = Literal.TrueLiteral): LogicalPlan = {
-
- // TODO: avoid executing the condition for each column
-
- // the plan output may include metadata columns that are not modified
- // that's why the number of assignments may not match the number of plan output columns
-
- val assignedValues = assignments.map(_.value)
- val updatedValues = plan.output.zipWithIndex.map { case (attr, index) =>
- if (index < assignments.size) {
- val assignedExpr = assignedValues(index)
- val updatedValue = If(cond, assignedExpr, attr)
- Alias(updatedValue, attr.name)()
- } else {
- attr
- }
- }
-
- Project(updatedValues, plan)
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/expressions/AssignmentUtils.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/expressions/AssignmentUtils.scala
deleted file mode 100644
index ce3818922c78..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/expressions/AssignmentUtils.scala
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.catalyst.expressions
-
-import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.SQLConfHelper
-import org.apache.spark.sql.catalyst.plans.logical.Assignment
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.util.CharVarcharUtils
-import org.apache.spark.sql.types.DataType
-
-object AssignmentUtils extends SQLConfHelper {
-
- /**
- * Checks whether assignments are aligned and match table columns.
- *
- * @param table a target table
- * @param assignments assignments to check
- * @return true if the assignments are aligned
- */
- def aligned(table: LogicalPlan, assignments: Seq[Assignment]): Boolean = {
- val sameSize = table.output.size == assignments.size
- sameSize && table.output.zip(assignments).forall { case (attr, assignment) =>
- val key = assignment.key
- val value = assignment.value
- val refsEqual = toAssignmentRef(attr).zip(toAssignmentRef(key))
- .forall{ case (attrRef, keyRef) => conf.resolver(attrRef, keyRef)}
-
- refsEqual &&
- DataType.equalsIgnoreCompatibleNullability(value.dataType, attr.dataType) &&
- (attr.nullable || !value.nullable)
- }
- }
-
- def toAssignmentRef(expr: Expression): Seq[String] = expr match {
- case attr: AttributeReference =>
- Seq(attr.name)
- case Alias(child, _) =>
- toAssignmentRef(child)
- case GetStructField(child, _, Some(name)) =>
- toAssignmentRef(child) :+ name
- case other: ExtractValue =>
- throw new AnalysisException(s"Updating nested fields is only supported for structs: $other")
- case other =>
- throw new AnalysisException(s"Cannot convert to a reference, unsupported expression: $other")
- }
-
- def handleCharVarcharLimits(assignment: Assignment): Assignment = {
- val key = assignment.key
- val value = assignment.value
-
- val rawKeyType = key.transform {
- case attr: AttributeReference =>
- CharVarcharUtils.getRawType(attr.metadata)
- .map(attr.withDataType)
- .getOrElse(attr)
- }.dataType
-
- if (CharVarcharUtils.hasCharVarchar(rawKeyType)) {
- val newKey = key.transform {
- case attr: AttributeReference => CharVarcharUtils.cleanAttrMetadata(attr)
- }
- val newValue = CharVarcharUtils.stringLengthCheck(value, rawKeyType)
- Assignment(newKey, newValue)
- } else {
- assignment
- }
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExtendedV2ExpressionUtils.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExtendedV2ExpressionUtils.scala
deleted file mode 100644
index b872281d7f21..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExtendedV2ExpressionUtils.scala
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.catalyst.expressions
-
-import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.SQLConfHelper
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.connector.expressions.{Expression => V2Expression}
-import org.apache.spark.sql.connector.expressions.{SortDirection => V2SortDirection}
-import org.apache.spark.sql.connector.expressions.{NullOrdering => V2NullOrdering}
-import org.apache.spark.sql.connector.expressions.BucketTransform
-import org.apache.spark.sql.connector.expressions.DaysTransform
-import org.apache.spark.sql.connector.expressions.FieldReference
-import org.apache.spark.sql.connector.expressions.HoursTransform
-import org.apache.spark.sql.connector.expressions.IdentityTransform
-import org.apache.spark.sql.connector.expressions.MonthsTransform
-import org.apache.spark.sql.connector.expressions.NamedReference
-import org.apache.spark.sql.connector.expressions.SortValue
-import org.apache.spark.sql.connector.expressions.TruncateTransform
-import org.apache.spark.sql.connector.expressions.YearsTransform
-import org.apache.spark.sql.errors.QueryCompilationErrors
-
-/**
- * A class that is inspired by V2ExpressionUtils in Spark but supports Iceberg transforms.
- */
-object ExtendedV2ExpressionUtils extends SQLConfHelper {
- import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.MultipartIdentifierHelper
-
- def resolveRef[T <: NamedExpression](ref: NamedReference, plan: LogicalPlan): T = {
- plan.resolve(ref.fieldNames.toSeq, conf.resolver) match {
- case Some(namedExpr) =>
- namedExpr.asInstanceOf[T]
- case None =>
- val name = ref.fieldNames.toSeq.quoted
- val outputString = plan.output.map(_.name).mkString(",")
- throw QueryCompilationErrors.cannotResolveAttributeError(name, outputString)
- }
- }
-
- def resolveRefs[T <: NamedExpression](refs: Seq[NamedReference], plan: LogicalPlan): Seq[T] = {
- refs.map(ref => resolveRef[T](ref, plan))
- }
-
- def toCatalyst(expr: V2Expression, query: LogicalPlan): Expression = {
- expr match {
- case SortValue(child, direction, nullOrdering) =>
- val catalystChild = toCatalyst(child, query)
- SortOrder(catalystChild, toCatalyst(direction), toCatalyst(nullOrdering), Seq.empty)
- case IdentityTransform(ref) =>
- resolveRef[NamedExpression](ref, query)
- case BucketTransform(numBuckets, ref) =>
- IcebergBucketTransform(numBuckets, resolveRef[NamedExpression](ref, query))
- case TruncateTransform(length, ref) =>
- IcebergTruncateTransform(resolveRef[NamedExpression](ref, query), length)
- case YearsTransform(ref) =>
- IcebergYearTransform(resolveRef[NamedExpression](ref, query))
- case MonthsTransform(ref) =>
- IcebergMonthTransform(resolveRef[NamedExpression](ref, query))
- case DaysTransform(ref) =>
- IcebergDayTransform(resolveRef[NamedExpression](ref, query))
- case HoursTransform(ref) =>
- IcebergHourTransform(resolveRef[NamedExpression](ref, query))
- case ref: FieldReference =>
- resolveRef[NamedExpression](ref, query)
- case _ =>
- throw new AnalysisException(s"$expr is not currently supported")
- }
- }
-
- private def toCatalyst(direction: V2SortDirection): SortDirection = direction match {
- case V2SortDirection.ASCENDING => Ascending
- case V2SortDirection.DESCENDING => Descending
- }
-
- private def toCatalyst(nullOrdering: V2NullOrdering): NullOrdering = nullOrdering match {
- case V2NullOrdering.NULLS_FIRST => NullsFirst
- case V2NullOrdering.NULLS_LAST => NullsLast
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ExtendedReplaceNullWithFalseInPredicate.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ExtendedReplaceNullWithFalseInPredicate.scala
deleted file mode 100644
index 4d369ca0793d..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ExtendedReplaceNullWithFalseInPredicate.scala
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.catalyst.optimizer
-
-import org.apache.spark.sql.catalyst.expressions.And
-import org.apache.spark.sql.catalyst.expressions.CaseWhen
-import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.catalyst.expressions.If
-import org.apache.spark.sql.catalyst.expressions.In
-import org.apache.spark.sql.catalyst.expressions.InSet
-import org.apache.spark.sql.catalyst.expressions.Literal
-import org.apache.spark.sql.catalyst.expressions.Literal.FalseLiteral
-import org.apache.spark.sql.catalyst.expressions.Not
-import org.apache.spark.sql.catalyst.expressions.Or
-import org.apache.spark.sql.catalyst.plans.logical.DeleteAction
-import org.apache.spark.sql.catalyst.plans.logical.DeleteFromIcebergTable
-import org.apache.spark.sql.catalyst.plans.logical.InsertAction
-import org.apache.spark.sql.catalyst.plans.logical.InsertStarAction
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.plans.logical.MergeAction
-import org.apache.spark.sql.catalyst.plans.logical.MergeIntoIcebergTable
-import org.apache.spark.sql.catalyst.plans.logical.UpdateAction
-import org.apache.spark.sql.catalyst.plans.logical.UpdateIcebergTable
-import org.apache.spark.sql.catalyst.plans.logical.UpdateStarAction
-import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.catalyst.trees.TreePattern.INSET
-import org.apache.spark.sql.catalyst.trees.TreePattern.NULL_LITERAL
-import org.apache.spark.sql.catalyst.trees.TreePattern.TRUE_OR_FALSE_LITERAL
-import org.apache.spark.sql.types.BooleanType
-import org.apache.spark.util.Utils
-
-/**
- * A rule similar to ReplaceNullWithFalseInPredicate in Spark but applies to Iceberg row-level commands.
- */
-object ExtendedReplaceNullWithFalseInPredicate extends Rule[LogicalPlan] {
-
- def apply(plan: LogicalPlan): LogicalPlan = plan.transformWithPruning(
- _.containsAnyPattern(NULL_LITERAL, TRUE_OR_FALSE_LITERAL, INSET)) {
-
- case d @ DeleteFromIcebergTable(_, Some(cond), _) =>
- d.copy(condition = Some(replaceNullWithFalse(cond)))
-
- case u @ UpdateIcebergTable(_, _, Some(cond), _) =>
- u.copy(condition = Some(replaceNullWithFalse(cond)))
-
- case m @ MergeIntoIcebergTable(_, _, mergeCond, matchedActions, notMatchedActions, _) =>
- m.copy(
- mergeCondition = replaceNullWithFalse(mergeCond),
- matchedActions = replaceNullWithFalse(matchedActions),
- notMatchedActions = replaceNullWithFalse(notMatchedActions))
- }
-
- /**
- * Recursively traverse the Boolean-type expression to replace
- * `Literal(null, BooleanType)` with `FalseLiteral`, if possible.
- *
- * Note that `transformExpressionsDown` can not be used here as we must stop as soon as we hit
- * an expression that is not [[CaseWhen]], [[If]], [[And]], [[Or]] or
- * `Literal(null, BooleanType)`.
- */
- private def replaceNullWithFalse(e: Expression): Expression = e match {
- case Literal(null, BooleanType) =>
- FalseLiteral
- // In SQL, the `Not(IN)` expression evaluates as follows:
- // `NULL not in (1)` -> NULL
- // `NULL not in (1, NULL)` -> NULL
- // `1 not in (1, NULL)` -> false
- // `1 not in (2, NULL)` -> NULL
- // In predicate, NULL is equal to false, so we can simplify them to false directly.
- case Not(In(value, list)) if (value +: list).exists(isNullLiteral) =>
- FalseLiteral
- case Not(InSet(value, list)) if isNullLiteral(value) || list.contains(null) =>
- FalseLiteral
-
- case And(left, right) =>
- And(replaceNullWithFalse(left), replaceNullWithFalse(right))
- case Or(left, right) =>
- Or(replaceNullWithFalse(left), replaceNullWithFalse(right))
- case cw: CaseWhen if cw.dataType == BooleanType =>
- val newBranches = cw.branches.map { case (cond, value) =>
- replaceNullWithFalse(cond) -> replaceNullWithFalse(value)
- }
- val newElseValue = cw.elseValue.map(replaceNullWithFalse).getOrElse(FalseLiteral)
- CaseWhen(newBranches, newElseValue)
- case i @ If(pred, trueVal, falseVal) if i.dataType == BooleanType =>
- If(replaceNullWithFalse(pred), replaceNullWithFalse(trueVal), replaceNullWithFalse(falseVal))
- case e if e.dataType == BooleanType =>
- e
- case e =>
- val message = "Expected a Boolean type expression in replaceNullWithFalse, " +
- s"but got the type `${e.dataType.catalogString}` in `${e.sql}`."
- if (Utils.isTesting) {
- throw new IllegalArgumentException(message)
- } else {
- logWarning(message)
- e
- }
- }
-
- private def isNullLiteral(e: Expression): Boolean = e match {
- case Literal(null, _) => true
- case _ => false
- }
-
- private def replaceNullWithFalse(mergeActions: Seq[MergeAction]): Seq[MergeAction] = {
- mergeActions.map {
- case u @ UpdateAction(Some(cond), _) => u.copy(condition = Some(replaceNullWithFalse(cond)))
- case u @ UpdateStarAction(Some(cond)) => u.copy(condition = Some(replaceNullWithFalse(cond)))
- case d @ DeleteAction(Some(cond)) => d.copy(condition = Some(replaceNullWithFalse(cond)))
- case i @ InsertAction(Some(cond), _) => i.copy(condition = Some(replaceNullWithFalse(cond)))
- case i @ InsertStarAction(Some(cond)) => i.copy(condition = Some(replaceNullWithFalse(cond)))
- case other => other
- }
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ExtendedSimplifyConditionalsInPredicate.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ExtendedSimplifyConditionalsInPredicate.scala
deleted file mode 100644
index 74cf922c4a98..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ExtendedSimplifyConditionalsInPredicate.scala
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.catalyst.optimizer
-
-import org.apache.spark.sql.catalyst.expressions.And
-import org.apache.spark.sql.catalyst.expressions.CaseWhen
-import org.apache.spark.sql.catalyst.expressions.Coalesce
-import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.catalyst.expressions.If
-import org.apache.spark.sql.catalyst.expressions.Literal
-import org.apache.spark.sql.catalyst.expressions.Literal.FalseLiteral
-import org.apache.spark.sql.catalyst.expressions.Literal.TrueLiteral
-import org.apache.spark.sql.catalyst.expressions.Not
-import org.apache.spark.sql.catalyst.expressions.Or
-import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.catalyst.trees.TreePattern.CASE_WHEN
-import org.apache.spark.sql.catalyst.trees.TreePattern.IF
-import org.apache.spark.sql.types.BooleanType
-
-/**
- * A rule similar to SimplifyConditionalsInPredicate in Spark but applies to Iceberg row-level commands.
- */
-object ExtendedSimplifyConditionalsInPredicate extends Rule[LogicalPlan] {
-
- def apply(plan: LogicalPlan): LogicalPlan = plan.transformWithPruning(
- _.containsAnyPattern(CASE_WHEN, IF)) {
-
- case d @ DeleteFromIcebergTable(_, Some(cond), _) =>
- d.copy(condition = Some(simplifyConditional(cond)))
-
- case u @ UpdateIcebergTable(_, _, Some(cond), _) =>
- u.copy(condition = Some(simplifyConditional(cond)))
-
- case m @ MergeIntoIcebergTable(_, _, mergeCond, matchedActions, notMatchedActions, _) =>
- m.copy(
- mergeCondition = simplifyConditional(mergeCond),
- matchedActions = simplifyConditional(matchedActions),
- notMatchedActions = simplifyConditional(notMatchedActions))
- }
-
- private def simplifyConditional(e: Expression): Expression = e match {
- case And(left, right) => And(simplifyConditional(left), simplifyConditional(right))
- case Or(left, right) => Or(simplifyConditional(left), simplifyConditional(right))
- case If(cond, trueValue, FalseLiteral) => And(cond, trueValue)
- case If(cond, trueValue, TrueLiteral) => Or(Not(Coalesce(Seq(cond, FalseLiteral))), trueValue)
- case If(cond, FalseLiteral, falseValue) =>
- And(Not(Coalesce(Seq(cond, FalseLiteral))), falseValue)
- case If(cond, TrueLiteral, falseValue) => Or(cond, falseValue)
- case CaseWhen(Seq((cond, trueValue)),
- Some(FalseLiteral) | Some(Literal(null, BooleanType)) | None) =>
- And(cond, trueValue)
- case CaseWhen(Seq((cond, trueValue)), Some(TrueLiteral)) =>
- Or(Not(Coalesce(Seq(cond, FalseLiteral))), trueValue)
- case CaseWhen(Seq((cond, FalseLiteral)), Some(elseValue)) =>
- And(Not(Coalesce(Seq(cond, FalseLiteral))), elseValue)
- case CaseWhen(Seq((cond, TrueLiteral)), Some(elseValue)) =>
- Or(cond, elseValue)
- case e if e.dataType == BooleanType => e
- case e =>
- assert(e.dataType != BooleanType,
- "Expected a Boolean type expression in ExtendedSimplifyConditionalsInPredicate, " +
- s"but got the type `${e.dataType.catalogString}` in `${e.sql}`.")
- e
- }
-
- private def simplifyConditional(mergeActions: Seq[MergeAction]): Seq[MergeAction] = {
- mergeActions.map {
- case u @ UpdateAction(Some(cond), _) => u.copy(condition = Some(simplifyConditional(cond)))
- case u @ UpdateStarAction(Some(cond)) => u.copy(condition = Some(simplifyConditional(cond)))
- case d @ DeleteAction(Some(cond)) => d.copy(condition = Some(simplifyConditional(cond)))
- case i @ InsertAction(Some(cond), _) => i.copy(condition = Some(simplifyConditional(cond)))
- case i @ InsertStarAction(Some(cond)) => i.copy(condition = Some(simplifyConditional(cond)))
- case other => other
- }
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/parser/extensions/IcebergSparkSqlExtensionsParser.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/parser/extensions/IcebergSparkSqlExtensionsParser.scala
deleted file mode 100644
index 2996ceb366e1..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/parser/extensions/IcebergSparkSqlExtensionsParser.scala
+++ /dev/null
@@ -1,391 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.catalyst.parser.extensions
-
-import java.util.Locale
-import org.antlr.v4.runtime._
-import org.antlr.v4.runtime.atn.PredictionMode
-import org.antlr.v4.runtime.misc.Interval
-import org.antlr.v4.runtime.misc.ParseCancellationException
-import org.antlr.v4.runtime.tree.TerminalNodeImpl
-import org.apache.iceberg.common.DynConstructors
-import org.apache.iceberg.spark.ExtendedParser
-import org.apache.iceberg.spark.ExtendedParser.RawOrderField
-import org.apache.iceberg.spark.Spark3Util
-import org.apache.iceberg.spark.source.SparkTable
-import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.catalyst.FunctionIdentifier
-import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases
-import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
-import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.catalyst.parser.ParserInterface
-import org.apache.spark.sql.catalyst.parser.extensions.IcebergSqlExtensionsParser.NonReservedContext
-import org.apache.spark.sql.catalyst.parser.extensions.IcebergSqlExtensionsParser.QuotedIdentifierContext
-import org.apache.spark.sql.catalyst.plans.logical.DeleteFromIcebergTable
-import org.apache.spark.sql.catalyst.plans.logical.DeleteFromTable
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.plans.logical.MergeIntoContext
-import org.apache.spark.sql.catalyst.plans.logical.MergeIntoTable
-import org.apache.spark.sql.catalyst.plans.logical.UnresolvedMergeIntoIcebergTable
-import org.apache.spark.sql.catalyst.plans.logical.UpdateIcebergTable
-import org.apache.spark.sql.catalyst.plans.logical.UpdateTable
-import org.apache.spark.sql.catalyst.trees.Origin
-import org.apache.spark.sql.connector.catalog.Table
-import org.apache.spark.sql.connector.catalog.TableCatalog
-import org.apache.spark.sql.execution.command.ExplainCommand
-import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.internal.VariableSubstitution
-import org.apache.spark.sql.types.DataType
-import org.apache.spark.sql.types.StructType
-import scala.jdk.CollectionConverters._
-import scala.util.Try
-
-class IcebergSparkSqlExtensionsParser(delegate: ParserInterface) extends ParserInterface with ExtendedParser {
-
- import IcebergSparkSqlExtensionsParser._
-
- private lazy val substitutor = substitutorCtor.newInstance(SQLConf.get)
- private lazy val astBuilder = new IcebergSqlExtensionsAstBuilder(delegate)
-
- /**
- * Parse a string to a DataType.
- */
- override def parseDataType(sqlText: String): DataType = {
- delegate.parseDataType(sqlText)
- }
-
- /**
- * Parse a string to a raw DataType without CHAR/VARCHAR replacement.
- */
- def parseRawDataType(sqlText: String): DataType = throw new UnsupportedOperationException()
-
- /**
- * Parse a string to an Expression.
- */
- override def parseExpression(sqlText: String): Expression = {
- delegate.parseExpression(sqlText)
- }
-
- /**
- * Parse a string to a TableIdentifier.
- */
- override def parseTableIdentifier(sqlText: String): TableIdentifier = {
- delegate.parseTableIdentifier(sqlText)
- }
-
- /**
- * Parse a string to a FunctionIdentifier.
- */
- override def parseFunctionIdentifier(sqlText: String): FunctionIdentifier = {
- delegate.parseFunctionIdentifier(sqlText)
- }
-
- /**
- * Parse a string to a multi-part identifier.
- */
- override def parseMultipartIdentifier(sqlText: String): Seq[String] = {
- delegate.parseMultipartIdentifier(sqlText)
- }
-
- /**
- * Creates StructType for a given SQL string, which is a comma separated list of field
- * definitions which will preserve the correct Hive metadata.
- */
- override def parseTableSchema(sqlText: String): StructType = {
- delegate.parseTableSchema(sqlText)
- }
-
- override def parseSortOrder(sqlText: String): java.util.List[RawOrderField] = {
- val fields = parse(sqlText) { parser => astBuilder.visitSingleOrder(parser.singleOrder()) }
- fields.map { field =>
- val (term, direction, order) = field
- new RawOrderField(term, direction, order)
- }.asJava
- }
-
- /**
- * Parse a string to a LogicalPlan.
- */
- override def parsePlan(sqlText: String): LogicalPlan = {
- val sqlTextAfterSubstitution = substitutor.substitute(sqlText)
- if (isIcebergCommand(sqlTextAfterSubstitution)) {
- parse(sqlTextAfterSubstitution) { parser => astBuilder.visit(parser.singleStatement()) }.asInstanceOf[LogicalPlan]
- } else {
- val parsedPlan = delegate.parsePlan(sqlText)
- parsedPlan match {
- case e: ExplainCommand =>
- e.copy(logicalPlan = replaceRowLevelCommands(e.logicalPlan))
- case p =>
- replaceRowLevelCommands(p)
- }
- }
- }
-
- private def replaceRowLevelCommands(plan: LogicalPlan): LogicalPlan = plan resolveOperatorsDown {
- case DeleteFromTable(UnresolvedIcebergTable(aliasedTable), condition) =>
- DeleteFromIcebergTable(aliasedTable, condition)
-
- case UpdateTable(UnresolvedIcebergTable(aliasedTable), assignments, condition) =>
- UpdateIcebergTable(aliasedTable, assignments, condition)
-
- case MergeIntoTable(UnresolvedIcebergTable(aliasedTable), source, cond, matchedActions, notMatchedActions) =>
- // cannot construct MergeIntoIcebergTable right away as MERGE operations require special resolution
- // that's why the condition and actions must be hidden from the regular resolution rules in Spark
- // see ResolveMergeIntoTableReferences for details
- val context = MergeIntoContext(cond, matchedActions, notMatchedActions)
- UnresolvedMergeIntoIcebergTable(aliasedTable, source, context)
- }
-
- object UnresolvedIcebergTable {
-
- def unapply(plan: LogicalPlan): Option[LogicalPlan] = {
- EliminateSubqueryAliases(plan) match {
- case UnresolvedRelation(multipartIdentifier, _, _) if isIcebergTable(multipartIdentifier) =>
- Some(plan)
- case _ =>
- None
- }
- }
-
- private def isIcebergTable(multipartIdent: Seq[String]): Boolean = {
- val catalogAndIdentifier = Spark3Util.catalogAndIdentifier(SparkSession.active, multipartIdent.asJava)
- catalogAndIdentifier.catalog match {
- case tableCatalog: TableCatalog =>
- Try(tableCatalog.loadTable(catalogAndIdentifier.identifier))
- .map(isIcebergTable)
- .getOrElse(false)
-
- case _ =>
- false
- }
- }
-
- private def isIcebergTable(table: Table): Boolean = table match {
- case _: SparkTable => true
- case _ => false
- }
- }
-
- private def isIcebergCommand(sqlText: String): Boolean = {
- val normalized = sqlText.toLowerCase(Locale.ROOT).trim()
- // Strip simple SQL comments that terminate a line, e.g. comments starting with `--` .
- .replaceAll("--.*?\\n", " ")
- // Strip newlines.
- .replaceAll("\\s+", " ")
- // Strip comments of the form /* ... */. This must come after stripping newlines so that
- // comments that span multiple lines are caught.
- .replaceAll("/\\*.*?\\*/", " ")
- .trim()
- normalized.startsWith("call") || (
- normalized.startsWith("alter table") && (
- normalized.contains("add partition field") ||
- normalized.contains("drop partition field") ||
- normalized.contains("replace partition field") ||
- normalized.contains("write ordered by") ||
- normalized.contains("write locally ordered by") ||
- normalized.contains("write distributed by") ||
- normalized.contains("write unordered") ||
- normalized.contains("set identifier fields") ||
- normalized.contains("drop identifier fields") ||
- isSnapshotRefDdl(normalized)))
- }
-
- private def isSnapshotRefDdl(normalized: String): Boolean = {
- normalized.contains("create branch") ||
- normalized.contains("replace branch") ||
- normalized.contains("create tag") ||
- normalized.contains("replace tag") ||
- normalized.contains("drop branch") ||
- normalized.contains("drop tag")
- }
-
- protected def parse[T](command: String)(toResult: IcebergSqlExtensionsParser => T): T = {
- val lexer = new IcebergSqlExtensionsLexer(new UpperCaseCharStream(CharStreams.fromString(command)))
- lexer.removeErrorListeners()
- lexer.addErrorListener(IcebergParseErrorListener)
-
- val tokenStream = new CommonTokenStream(lexer)
- val parser = new IcebergSqlExtensionsParser(tokenStream)
- parser.addParseListener(IcebergSqlExtensionsPostProcessor)
- parser.removeErrorListeners()
- parser.addErrorListener(IcebergParseErrorListener)
-
- try {
- try {
- // first, try parsing with potentially faster SLL mode
- parser.getInterpreter.setPredictionMode(PredictionMode.SLL)
- toResult(parser)
- }
- catch {
- case _: ParseCancellationException =>
- // if we fail, parse with LL mode
- tokenStream.seek(0) // rewind input stream
- parser.reset()
-
- // Try Again.
- parser.getInterpreter.setPredictionMode(PredictionMode.LL)
- toResult(parser)
- }
- }
- catch {
- case e: IcebergParseException if e.command.isDefined =>
- throw e
- case e: IcebergParseException =>
- throw e.withCommand(command)
- case e: AnalysisException =>
- val position = Origin(e.line, e.startPosition)
- throw new IcebergParseException(Option(command), e.message, position, position)
- }
- }
-}
-
-object IcebergSparkSqlExtensionsParser {
- private val substitutorCtor: DynConstructors.Ctor[VariableSubstitution] =
- DynConstructors.builder()
- .impl(classOf[VariableSubstitution])
- .impl(classOf[VariableSubstitution], classOf[SQLConf])
- .build()
-}
-
-/* Copied from Apache Spark's to avoid dependency on Spark Internals */
-class UpperCaseCharStream(wrapped: CodePointCharStream) extends CharStream {
- override def consume(): Unit = wrapped.consume
- override def getSourceName(): String = wrapped.getSourceName
- override def index(): Int = wrapped.index
- override def mark(): Int = wrapped.mark
- override def release(marker: Int): Unit = wrapped.release(marker)
- override def seek(where: Int): Unit = wrapped.seek(where)
- override def size(): Int = wrapped.size
-
- override def getText(interval: Interval): String = wrapped.getText(interval)
-
- // scalastyle:off
- override def LA(i: Int): Int = {
- val la = wrapped.LA(i)
- if (la == 0 || la == IntStream.EOF) la
- else Character.toUpperCase(la)
- }
- // scalastyle:on
-}
-
-/**
- * The post-processor validates & cleans-up the parse tree during the parse process.
- */
-case object IcebergSqlExtensionsPostProcessor extends IcebergSqlExtensionsBaseListener {
-
- /** Remove the back ticks from an Identifier. */
- override def exitQuotedIdentifier(ctx: QuotedIdentifierContext): Unit = {
- replaceTokenByIdentifier(ctx, 1) { token =>
- // Remove the double back ticks in the string.
- token.setText(token.getText.replace("``", "`"))
- token
- }
- }
-
- /** Treat non-reserved keywords as Identifiers. */
- override def exitNonReserved(ctx: NonReservedContext): Unit = {
- replaceTokenByIdentifier(ctx, 0)(identity)
- }
-
- private def replaceTokenByIdentifier(
- ctx: ParserRuleContext,
- stripMargins: Int)(
- f: CommonToken => CommonToken = identity): Unit = {
- val parent = ctx.getParent
- parent.removeLastChild()
- val token = ctx.getChild(0).getPayload.asInstanceOf[Token]
- val newToken = new CommonToken(
- new org.antlr.v4.runtime.misc.Pair(token.getTokenSource, token.getInputStream),
- IcebergSqlExtensionsParser.IDENTIFIER,
- token.getChannel,
- token.getStartIndex + stripMargins,
- token.getStopIndex - stripMargins)
- parent.addChild(new TerminalNodeImpl(f(newToken)))
- }
-}
-
-/* Partially copied from Apache Spark's Parser to avoid dependency on Spark Internals */
-case object IcebergParseErrorListener extends BaseErrorListener {
- override def syntaxError(
- recognizer: Recognizer[_, _],
- offendingSymbol: scala.Any,
- line: Int,
- charPositionInLine: Int,
- msg: String,
- e: RecognitionException): Unit = {
- val (start, stop) = offendingSymbol match {
- case token: CommonToken =>
- val start = Origin(Some(line), Some(token.getCharPositionInLine))
- val length = token.getStopIndex - token.getStartIndex + 1
- val stop = Origin(Some(line), Some(token.getCharPositionInLine + length))
- (start, stop)
- case _ =>
- val start = Origin(Some(line), Some(charPositionInLine))
- (start, start)
- }
- throw new IcebergParseException(None, msg, start, stop)
- }
-}
-
-/**
- * Copied from Apache Spark
- * A [[ParseException]] is an [[AnalysisException]] that is thrown during the parse process. It
- * contains fields and an extended error message that make reporting and diagnosing errors easier.
- */
-class IcebergParseException(
- val command: Option[String],
- message: String,
- val start: Origin,
- val stop: Origin) extends AnalysisException(message, start.line, start.startPosition) {
-
- def this(message: String, ctx: ParserRuleContext) = {
- this(Option(IcebergParserUtils.command(ctx)),
- message,
- IcebergParserUtils.position(ctx.getStart),
- IcebergParserUtils.position(ctx.getStop))
- }
-
- override def getMessage: String = {
- val builder = new StringBuilder
- builder ++= "\n" ++= message
- start match {
- case Origin(Some(l), Some(p)) =>
- builder ++= s"(line $l, pos $p)\n"
- command.foreach { cmd =>
- val (above, below) = cmd.split("\n").splitAt(l)
- builder ++= "\n== SQL ==\n"
- above.foreach(builder ++= _ += '\n')
- builder ++= (0 until p).map(_ => "-").mkString("") ++= "^^^\n"
- below.foreach(builder ++= _ += '\n')
- }
- case _ =>
- command.foreach { cmd =>
- builder ++= "\n== SQL ==\n" ++= cmd
- }
- }
- builder.toString
- }
-
- def withCommand(cmd: String): IcebergParseException = {
- new IcebergParseException(Option(cmd), message, start, stop)
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/parser/extensions/IcebergSqlExtensionsAstBuilder.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/parser/extensions/IcebergSqlExtensionsAstBuilder.scala
deleted file mode 100644
index 2e438de2b8cd..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/parser/extensions/IcebergSqlExtensionsAstBuilder.scala
+++ /dev/null
@@ -1,385 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.catalyst.parser.extensions
-
-import java.util.Locale
-import java.util.concurrent.TimeUnit
-import org.antlr.v4.runtime._
-import org.antlr.v4.runtime.misc.Interval
-import org.antlr.v4.runtime.tree.ParseTree
-import org.antlr.v4.runtime.tree.TerminalNode
-import org.apache.iceberg.DistributionMode
-import org.apache.iceberg.NullOrder
-import org.apache.iceberg.SortDirection
-import org.apache.iceberg.expressions.Term
-import org.apache.iceberg.spark.Spark3Util
-import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.catalyst.expressions.Literal
-import org.apache.spark.sql.catalyst.parser.ParserInterface
-import org.apache.spark.sql.catalyst.parser.extensions.IcebergParserUtils.withOrigin
-import org.apache.spark.sql.catalyst.parser.extensions.IcebergSqlExtensionsParser._
-import org.apache.spark.sql.catalyst.plans.logical.AddPartitionField
-import org.apache.spark.sql.catalyst.plans.logical.BranchOptions
-import org.apache.spark.sql.catalyst.plans.logical.CallArgument
-import org.apache.spark.sql.catalyst.plans.logical.CallStatement
-import org.apache.spark.sql.catalyst.plans.logical.CreateOrReplaceBranch
-import org.apache.spark.sql.catalyst.plans.logical.CreateOrReplaceTag
-import org.apache.spark.sql.catalyst.plans.logical.DropBranch
-import org.apache.spark.sql.catalyst.plans.logical.DropIdentifierFields
-import org.apache.spark.sql.catalyst.plans.logical.DropPartitionField
-import org.apache.spark.sql.catalyst.plans.logical.DropTag
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.plans.logical.NamedArgument
-import org.apache.spark.sql.catalyst.plans.logical.PositionalArgument
-import org.apache.spark.sql.catalyst.plans.logical.ReplacePartitionField
-import org.apache.spark.sql.catalyst.plans.logical.SetIdentifierFields
-import org.apache.spark.sql.catalyst.plans.logical.SetWriteDistributionAndOrdering
-import org.apache.spark.sql.catalyst.plans.logical.TagOptions
-import org.apache.spark.sql.catalyst.trees.CurrentOrigin
-import org.apache.spark.sql.catalyst.trees.Origin
-import org.apache.spark.sql.connector.expressions
-import org.apache.spark.sql.connector.expressions.ApplyTransform
-import org.apache.spark.sql.connector.expressions.FieldReference
-import org.apache.spark.sql.connector.expressions.IdentityTransform
-import org.apache.spark.sql.connector.expressions.LiteralValue
-import org.apache.spark.sql.connector.expressions.Transform
-import scala.jdk.CollectionConverters._
-
-class IcebergSqlExtensionsAstBuilder(delegate: ParserInterface) extends IcebergSqlExtensionsBaseVisitor[AnyRef] {
-
- private def toBuffer[T](list: java.util.List[T]): scala.collection.mutable.Buffer[T] = list.asScala
- private def toSeq[T](list: java.util.List[T]): Seq[T] = toBuffer(list).toSeq
-
- /**
- * Create a [[CallStatement]] for a stored procedure call.
- */
- override def visitCall(ctx: CallContext): CallStatement = withOrigin(ctx) {
- val name = toSeq(ctx.multipartIdentifier.parts).map(_.getText)
- val args = toSeq(ctx.callArgument).map(typedVisit[CallArgument])
- CallStatement(name, args)
- }
-
- /**
- * Create an ADD PARTITION FIELD logical command.
- */
- override def visitAddPartitionField(ctx: AddPartitionFieldContext): AddPartitionField = withOrigin(ctx) {
- AddPartitionField(
- typedVisit[Seq[String]](ctx.multipartIdentifier),
- typedVisit[Transform](ctx.transform),
- Option(ctx.name).map(_.getText))
- }
-
- /**
- * Create a DROP PARTITION FIELD logical command.
- */
- override def visitDropPartitionField(ctx: DropPartitionFieldContext): DropPartitionField = withOrigin(ctx) {
- DropPartitionField(
- typedVisit[Seq[String]](ctx.multipartIdentifier),
- typedVisit[Transform](ctx.transform))
- }
-
- /**
- * Create a CREATE OR REPLACE BRANCH logical command.
- */
- override def visitCreateOrReplaceBranch(ctx: CreateOrReplaceBranchContext): CreateOrReplaceBranch = withOrigin(ctx) {
- val createOrReplaceBranchClause = ctx.createReplaceBranchClause()
-
- val branchName = createOrReplaceBranchClause.identifier()
- val branchOptionsContext = Option(createOrReplaceBranchClause.branchOptions())
- val snapshotId = branchOptionsContext.flatMap(branchOptions => Option(branchOptions.snapshotId()))
- .map(_.getText.toLong)
- val snapshotRetention = branchOptionsContext.flatMap(branchOptions => Option(branchOptions.snapshotRetention()))
- val minSnapshotsToKeep = snapshotRetention.flatMap(retention => Option(retention.minSnapshotsToKeep()))
- .map(minSnapshots => minSnapshots.number().getText.toLong)
- val maxSnapshotAgeMs = snapshotRetention
- .flatMap(retention => Option(retention.maxSnapshotAge()))
- .map(retention => TimeUnit.valueOf(retention.timeUnit().getText.toUpperCase(Locale.ENGLISH))
- .toMillis(retention.number().getText.toLong))
- val branchRetention = branchOptionsContext.flatMap(branchOptions => Option(branchOptions.refRetain()))
- val branchRefAgeMs = branchRetention.map(retain =>
- TimeUnit.valueOf(retain.timeUnit().getText.toUpperCase(Locale.ENGLISH)).toMillis(retain.number().getText.toLong))
- val create = createOrReplaceBranchClause.CREATE() != null
- val replace = ctx.createReplaceBranchClause().REPLACE() != null
- val ifNotExists = createOrReplaceBranchClause.EXISTS() != null
-
- val branchOptions = BranchOptions(
- snapshotId,
- minSnapshotsToKeep,
- maxSnapshotAgeMs,
- branchRefAgeMs
- )
-
- CreateOrReplaceBranch(
- typedVisit[Seq[String]](ctx.multipartIdentifier),
- branchName.getText,
- branchOptions,
- create,
- replace,
- ifNotExists)
- }
-
- /**
- * Create an CREATE OR REPLACE TAG logical command.
- */
- override def visitCreateOrReplaceTag(ctx: CreateOrReplaceTagContext): CreateOrReplaceTag = withOrigin(ctx) {
- val createTagClause = ctx.createReplaceTagClause()
-
- val tagName = createTagClause.identifier().getText
-
- val tagOptionsContext = Option(createTagClause.tagOptions())
- val snapshotId = tagOptionsContext.flatMap(tagOptions => Option(tagOptions.snapshotId()))
- .map(_.getText.toLong)
- val tagRetain = tagOptionsContext.flatMap(tagOptions => Option(tagOptions.refRetain()))
- val tagRefAgeMs = tagRetain.map(retain =>
- TimeUnit.valueOf(retain.timeUnit().getText.toUpperCase(Locale.ENGLISH)).toMillis(retain.number().getText.toLong))
- val tagOptions = TagOptions(
- snapshotId,
- tagRefAgeMs
- )
-
- val create = createTagClause.CREATE() != null
- val replace = createTagClause.REPLACE() != null
- val ifNotExists = createTagClause.EXISTS() != null
-
- CreateOrReplaceTag(typedVisit[Seq[String]](ctx.multipartIdentifier),
- tagName,
- tagOptions,
- create,
- replace,
- ifNotExists)
- }
-
- /**
- * Create an DROP BRANCH logical command.
- */
- override def visitDropBranch(ctx: DropBranchContext): DropBranch = withOrigin(ctx) {
- DropBranch(typedVisit[Seq[String]](ctx.multipartIdentifier), ctx.identifier().getText, ctx.EXISTS() != null)
- }
-
- /**
- * Create an DROP TAG logical command.
- */
- override def visitDropTag(ctx: DropTagContext): DropTag = withOrigin(ctx) {
- DropTag(typedVisit[Seq[String]](ctx.multipartIdentifier), ctx.identifier().getText, ctx.EXISTS() != null)
- }
-
- /**
- * Create an REPLACE PARTITION FIELD logical command.
- */
- override def visitReplacePartitionField(ctx: ReplacePartitionFieldContext): ReplacePartitionField = withOrigin(ctx) {
- ReplacePartitionField(
- typedVisit[Seq[String]](ctx.multipartIdentifier),
- typedVisit[Transform](ctx.transform(0)),
- typedVisit[Transform](ctx.transform(1)),
- Option(ctx.name).map(_.getText))
- }
-
- /**
- * Create an SET IDENTIFIER FIELDS logical command.
- */
- override def visitSetIdentifierFields(ctx: SetIdentifierFieldsContext): SetIdentifierFields = withOrigin(ctx) {
- SetIdentifierFields(
- typedVisit[Seq[String]](ctx.multipartIdentifier),
- toSeq(ctx.fieldList.fields).map(_.getText))
- }
-
- /**
- * Create an DROP IDENTIFIER FIELDS logical command.
- */
- override def visitDropIdentifierFields(ctx: DropIdentifierFieldsContext): DropIdentifierFields = withOrigin(ctx) {
- DropIdentifierFields(
- typedVisit[Seq[String]](ctx.multipartIdentifier),
- toSeq(ctx.fieldList.fields).map(_.getText))
- }
-
- /**
- * Create a [[SetWriteDistributionAndOrdering]] for changing the write distribution and ordering.
- */
- override def visitSetWriteDistributionAndOrdering(
- ctx: SetWriteDistributionAndOrderingContext): SetWriteDistributionAndOrdering = {
-
- val tableName = typedVisit[Seq[String]](ctx.multipartIdentifier)
-
- val (distributionSpec, orderingSpec) = toDistributionAndOrderingSpec(ctx.writeSpec)
-
- if (distributionSpec == null && orderingSpec == null) {
- throw new AnalysisException(
- "ALTER TABLE has no changes: missing both distribution and ordering clauses")
- }
-
- val distributionMode = if (distributionSpec != null) {
- DistributionMode.HASH
- } else if (orderingSpec.UNORDERED != null || orderingSpec.LOCALLY != null) {
- DistributionMode.NONE
- } else {
- DistributionMode.RANGE
- }
-
- val ordering = if (orderingSpec != null && orderingSpec.order != null) {
- toSeq(orderingSpec.order.fields).map(typedVisit[(Term, SortDirection, NullOrder)])
- } else {
- Seq.empty
- }
-
- SetWriteDistributionAndOrdering(tableName, distributionMode, ordering)
- }
-
- private def toDistributionAndOrderingSpec(
- writeSpec: WriteSpecContext): (WriteDistributionSpecContext, WriteOrderingSpecContext) = {
-
- if (writeSpec.writeDistributionSpec.size > 1) {
- throw new AnalysisException("ALTER TABLE contains multiple distribution clauses")
- }
-
- if (writeSpec.writeOrderingSpec.size > 1) {
- throw new AnalysisException("ALTER TABLE contains multiple ordering clauses")
- }
-
- val distributionSpec = toBuffer(writeSpec.writeDistributionSpec).headOption.orNull
- val orderingSpec = toBuffer(writeSpec.writeOrderingSpec).headOption.orNull
-
- (distributionSpec, orderingSpec)
- }
-
- /**
- * Create an order field.
- */
- override def visitOrderField(ctx: OrderFieldContext): (Term, SortDirection, NullOrder) = {
- val term = Spark3Util.toIcebergTerm(typedVisit[Transform](ctx.transform))
- val direction = Option(ctx.ASC).map(_ => SortDirection.ASC)
- .orElse(Option(ctx.DESC).map(_ => SortDirection.DESC))
- .getOrElse(SortDirection.ASC)
- val nullOrder = Option(ctx.FIRST).map(_ => NullOrder.NULLS_FIRST)
- .orElse(Option(ctx.LAST).map(_ => NullOrder.NULLS_LAST))
- .getOrElse(if (direction == SortDirection.ASC) NullOrder.NULLS_FIRST else NullOrder.NULLS_LAST)
- (term, direction, nullOrder)
- }
-
- /**
- * Create an IdentityTransform for a column reference.
- */
- override def visitIdentityTransform(ctx: IdentityTransformContext): Transform = withOrigin(ctx) {
- IdentityTransform(FieldReference(typedVisit[Seq[String]](ctx.multipartIdentifier())))
- }
-
- /**
- * Create a named Transform from argument expressions.
- */
- override def visitApplyTransform(ctx: ApplyTransformContext): Transform = withOrigin(ctx) {
- val args = toSeq(ctx.arguments).map(typedVisit[expressions.Expression])
- ApplyTransform(ctx.transformName.getText, args)
- }
-
- /**
- * Create a transform argument from a column reference or a constant.
- */
- override def visitTransformArgument(ctx: TransformArgumentContext): expressions.Expression = withOrigin(ctx) {
- val reference = Option(ctx.multipartIdentifier())
- .map(typedVisit[Seq[String]])
- .map(FieldReference(_))
- val literal = Option(ctx.constant)
- .map(visitConstant)
- .map(lit => LiteralValue(lit.value, lit.dataType))
- reference.orElse(literal)
- .getOrElse(throw new IcebergParseException(s"Invalid transform argument", ctx))
- }
-
- /**
- * Return a multi-part identifier as Seq[String].
- */
- override def visitMultipartIdentifier(ctx: MultipartIdentifierContext): Seq[String] = withOrigin(ctx) {
- toSeq(ctx.parts).map(_.getText)
- }
-
- override def visitSingleOrder(ctx: SingleOrderContext): Seq[(Term, SortDirection, NullOrder)] = withOrigin(ctx) {
- toSeq(ctx.order.fields).map(typedVisit[(Term, SortDirection, NullOrder)])
- }
-
- /**
- * Create a positional argument in a stored procedure call.
- */
- override def visitPositionalArgument(ctx: PositionalArgumentContext): CallArgument = withOrigin(ctx) {
- val expr = typedVisit[Expression](ctx.expression)
- PositionalArgument(expr)
- }
-
- /**
- * Create a named argument in a stored procedure call.
- */
- override def visitNamedArgument(ctx: NamedArgumentContext): CallArgument = withOrigin(ctx) {
- val name = ctx.identifier.getText
- val expr = typedVisit[Expression](ctx.expression)
- NamedArgument(name, expr)
- }
-
- override def visitSingleStatement(ctx: SingleStatementContext): LogicalPlan = withOrigin(ctx) {
- visit(ctx.statement).asInstanceOf[LogicalPlan]
- }
-
- def visitConstant(ctx: ConstantContext): Literal = {
- delegate.parseExpression(ctx.getText).asInstanceOf[Literal]
- }
-
- override def visitExpression(ctx: ExpressionContext): Expression = {
- // reconstruct the SQL string and parse it using the main Spark parser
- // while we can avoid the logic to build Spark expressions, we still have to parse them
- // we cannot call ctx.getText directly since it will not render spaces correctly
- // that's why we need to recurse down the tree in reconstructSqlString
- val sqlString = reconstructSqlString(ctx)
- delegate.parseExpression(sqlString)
- }
-
- private def reconstructSqlString(ctx: ParserRuleContext): String = {
- toBuffer(ctx.children).map {
- case c: ParserRuleContext => reconstructSqlString(c)
- case t: TerminalNode => t.getText
- }.mkString(" ")
- }
-
- private def typedVisit[T](ctx: ParseTree): T = {
- ctx.accept(this).asInstanceOf[T]
- }
-}
-
-/* Partially copied from Apache Spark's Parser to avoid dependency on Spark Internals */
-object IcebergParserUtils {
-
- private[sql] def withOrigin[T](ctx: ParserRuleContext)(f: => T): T = {
- val current = CurrentOrigin.get
- CurrentOrigin.set(position(ctx.getStart))
- try {
- f
- } finally {
- CurrentOrigin.set(current)
- }
- }
-
- private[sql] def position(token: Token): Origin = {
- val opt = Option(token)
- Origin(opt.map(_.getLine), opt.map(_.getCharPositionInLine))
- }
-
- /** Get the command which created the token. */
- private[sql] def command(ctx: ParserRuleContext): String = {
- val stream = ctx.getStart.getInputStream
- stream.getText(Interval.of(0, stream.size() - 1))
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/planning/RewrittenRowLevelCommand.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/planning/RewrittenRowLevelCommand.scala
deleted file mode 100644
index 4cc7a7bf2f96..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/planning/RewrittenRowLevelCommand.scala
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.catalyst.planning
-
-import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.plans.logical.ReplaceData
-import org.apache.spark.sql.catalyst.plans.logical.RowLevelCommand
-import org.apache.spark.sql.catalyst.plans.logical.UpdateIcebergTable
-import org.apache.spark.sql.catalyst.plans.logical.WriteDelta
-import org.apache.spark.sql.connector.catalog.Table
-import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
-import org.apache.spark.sql.execution.datasources.v2.DataSourceV2ScanRelation
-
-/**
- * An extractor for operations such as DELETE and MERGE that require rewriting data.
- *
- * This class extracts the following entities:
- * - the row-level command (such as DeleteFromIcebergTable);
- * - the read relation in the rewrite plan that can be either DataSourceV2Relation or
- * DataSourceV2ScanRelation depending on whether the planning has already happened;
- * - the current rewrite plan.
- */
-object RewrittenRowLevelCommand {
- type ReturnType = (RowLevelCommand, LogicalPlan, LogicalPlan)
-
- def unapply(plan: LogicalPlan): Option[ReturnType] = plan match {
- case c: RowLevelCommand if c.rewritePlan.nonEmpty =>
- val rewritePlan = c.rewritePlan.get
-
- // both ReplaceData and WriteDelta reference a write relation
- // but the corresponding read relation should be at the bottom of the write plan
- // both the write and read relations will share the same RowLevelOperationTable object
- // that's why it is safe to use reference equality to find the needed read relation
-
- val allowScanDuplication = c match {
- // group-based updates that rely on the union approach may have multiple identical scans
- case _: UpdateIcebergTable if rewritePlan.isInstanceOf[ReplaceData] => true
- case _ => false
- }
-
- rewritePlan match {
- case rd @ ReplaceData(DataSourceV2Relation(table, _, _, _, _), query, _, _) =>
- val readRelation = findReadRelation(table, query, allowScanDuplication)
- readRelation.map((c, _, rd))
- case wd @ WriteDelta(DataSourceV2Relation(table, _, _, _, _), query, _, _, _) =>
- val readRelation = findReadRelation(table, query, allowScanDuplication)
- readRelation.map((c, _, wd))
- case _ =>
- None
- }
-
- case _ =>
- None
- }
-
- private def findReadRelation(
- table: Table,
- plan: LogicalPlan,
- allowScanDuplication: Boolean): Option[LogicalPlan] = {
-
- val readRelations = plan.collect {
- case r: DataSourceV2Relation if r.table eq table => r
- case r: DataSourceV2ScanRelation if r.relation.table eq table => r
- }
-
- // in some cases, the optimizer replaces the v2 read relation with a local relation
- // for example, there is no reason to query the table if the condition is always false
- // that's why it is valid not to find the corresponding v2 read relation
-
- readRelations match {
- case relations if relations.isEmpty =>
- None
-
- case Seq(relation) =>
- Some(relation)
-
- case Seq(relation1: DataSourceV2Relation, relation2: DataSourceV2Relation)
- if allowScanDuplication && (relation1.table eq relation2.table) =>
- Some(relation1)
-
- case Seq(relation1: DataSourceV2ScanRelation, relation2: DataSourceV2ScanRelation)
- if allowScanDuplication && (relation1.scan eq relation2.scan) =>
- Some(relation1)
-
- case Seq(relation1, relation2) if allowScanDuplication =>
- throw new AnalysisException(s"Row-level read relations don't match: $relation1, $relation2")
-
- case relations if allowScanDuplication =>
- throw new AnalysisException(s"Expected up to two row-level read relations: $relations")
-
- case relations =>
- throw new AnalysisException(s"Expected only one row-level read relation: $relations")
- }
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/AddPartitionField.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/AddPartitionField.scala
deleted file mode 100644
index e8b1b2941161..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/AddPartitionField.scala
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.catalyst.plans.logical
-
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.connector.expressions.Transform
-
-case class AddPartitionField(table: Seq[String], transform: Transform, name: Option[String]) extends LeafCommand {
- import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
-
- override lazy val output: Seq[Attribute] = Nil
-
- override def simpleString(maxFields: Int): String = {
- s"AddPartitionField ${table.quoted} ${name.map(n => s"$n=").getOrElse("")}${transform.describe}"
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/BranchOptions.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/BranchOptions.scala
deleted file mode 100644
index 2041a983700a..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/BranchOptions.scala
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.catalyst.plans.logical
-
-case class BranchOptions(snapshotId: Option[Long], numSnapshots: Option[Long],
- snapshotRetain: Option[Long], snapshotRefRetain: Option[Long])
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Call.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Call.scala
deleted file mode 100644
index 9616dae5a8d3..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Call.scala
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.catalyst.plans.logical
-
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.catalyst.util.truncatedString
-import org.apache.spark.sql.connector.iceberg.catalog.Procedure
-
-case class Call(procedure: Procedure, args: Seq[Expression]) extends LeafCommand {
- override lazy val output: Seq[Attribute] = procedure.outputType.toAttributes
-
- override def simpleString(maxFields: Int): String = {
- s"Call${truncatedString(output.toSeq, "[", ", ", "]", maxFields)} ${procedure.description}"
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/CreateOrReplaceBranch.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/CreateOrReplaceBranch.scala
deleted file mode 100644
index b7981a3c7a0d..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/CreateOrReplaceBranch.scala
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.catalyst.plans.logical
-
-import org.apache.spark.sql.catalyst.expressions.Attribute
-
-case class CreateOrReplaceBranch(
- table: Seq[String],
- branch: String,
- branchOptions: BranchOptions,
- create: Boolean,
- replace: Boolean,
- ifNotExists: Boolean) extends LeafCommand {
-
- import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
-
- override lazy val output: Seq[Attribute] = Nil
-
- override def simpleString(maxFields: Int): String = {
- s"CreateOrReplaceBranch branch: ${branch} for table: ${table.quoted}"
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/CreateOrReplaceTag.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/CreateOrReplaceTag.scala
deleted file mode 100644
index 6e7db84a90fb..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/CreateOrReplaceTag.scala
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.catalyst.plans.logical
-
-import org.apache.spark.sql.catalyst.expressions.Attribute
-
-case class CreateOrReplaceTag(
- table: Seq[String],
- tag: String,
- tagOptions: TagOptions,
- create: Boolean,
- replace: Boolean,
- ifNotExists: Boolean) extends LeafCommand {
-
- import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
-
- override lazy val output: Seq[Attribute] = Nil
-
- override def simpleString(maxFields: Int): String = {
- s"CreateOrReplaceTag tag: ${tag} for table: ${table.quoted}"
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/DeleteFromIcebergTable.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/DeleteFromIcebergTable.scala
deleted file mode 100644
index d1268e416c50..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/DeleteFromIcebergTable.scala
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.catalyst.plans.logical
-
-import org.apache.spark.sql.catalyst.expressions.Expression
-
-case class DeleteFromIcebergTable(
- table: LogicalPlan,
- condition: Option[Expression],
- rewritePlan: Option[LogicalPlan] = None) extends RowLevelCommand {
-
- override def children: Seq[LogicalPlan] = if (rewritePlan.isDefined) {
- table :: rewritePlan.get :: Nil
- } else {
- table :: Nil
- }
-
- override def withNewRewritePlan(newRewritePlan: LogicalPlan): RowLevelCommand = {
- copy(rewritePlan = Some(newRewritePlan))
- }
-
- override protected def withNewChildrenInternal(
- newChildren: IndexedSeq[LogicalPlan]): DeleteFromIcebergTable = {
- if (newChildren.size == 1) {
- copy(table = newChildren.head, rewritePlan = None)
- } else {
- require(newChildren.size == 2, "DeleteFromIcebergTable expects either one or two children")
- val Seq(newTable, newRewritePlan) = newChildren.take(2)
- copy(table = newTable, rewritePlan = Some(newRewritePlan))
- }
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/DropBranch.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/DropBranch.scala
deleted file mode 100644
index bee0b0fae688..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/DropBranch.scala
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.catalyst.plans.logical
-
-import org.apache.spark.sql.catalyst.expressions.Attribute
-
-case class DropBranch(table: Seq[String], branch: String, ifExists: Boolean) extends LeafCommand {
-
- import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
-
- override lazy val output: Seq[Attribute] = Nil
-
- override def simpleString(maxFields: Int): String = {
- s"DropBranch branch: ${branch} for table: ${table.quoted}"
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/DropIdentifierFields.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/DropIdentifierFields.scala
deleted file mode 100644
index 29dd686a0fba..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/DropIdentifierFields.scala
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.catalyst.plans.logical
-
-import org.apache.spark.sql.catalyst.expressions.Attribute
-
-case class DropIdentifierFields(
- table: Seq[String],
- fields: Seq[String]) extends LeafCommand {
- import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
-
- override lazy val output: Seq[Attribute] = Nil
-
- override def simpleString(maxFields: Int): String = {
- s"DropIdentifierFields ${table.quoted} (${fields.quoted})"
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/DropPartitionField.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/DropPartitionField.scala
deleted file mode 100644
index fb1451324182..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/DropPartitionField.scala
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.catalyst.plans.logical
-
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.connector.expressions.Transform
-
-case class DropPartitionField(table: Seq[String], transform: Transform) extends LeafCommand {
- import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
-
- override lazy val output: Seq[Attribute] = Nil
-
- override def simpleString(maxFields: Int): String = {
- s"DropPartitionField ${table.quoted} ${transform.describe}"
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/DropTag.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/DropTag.scala
deleted file mode 100644
index 7e4b38e74d2f..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/DropTag.scala
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.catalyst.plans.logical
-
-import org.apache.spark.sql.catalyst.expressions.Attribute
-
-case class DropTag(table: Seq[String], tag: String, ifExists: Boolean) extends LeafCommand {
-
- import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
-
- override lazy val output: Seq[Attribute] = Nil
-
- override def simpleString(maxFields: Int): String = {
- s"DropTag tag: ${tag} for table: ${table.quoted}"
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/MergeIntoIcebergTable.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/MergeIntoIcebergTable.scala
deleted file mode 100644
index 8f84851dcda2..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/MergeIntoIcebergTable.scala
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.catalyst.plans.logical
-
-import org.apache.spark.sql.catalyst.expressions.AssignmentUtils
-import org.apache.spark.sql.catalyst.expressions.Expression
-
-case class MergeIntoIcebergTable(
- targetTable: LogicalPlan,
- sourceTable: LogicalPlan,
- mergeCondition: Expression,
- matchedActions: Seq[MergeAction],
- notMatchedActions: Seq[MergeAction],
- rewritePlan: Option[LogicalPlan] = None) extends RowLevelCommand {
-
- lazy val aligned: Boolean = {
- val matchedActionsAligned = matchedActions.forall {
- case UpdateAction(_, assignments) =>
- AssignmentUtils.aligned(targetTable, assignments)
- case _: DeleteAction =>
- true
- case _ =>
- false
- }
-
- val notMatchedActionsAligned = notMatchedActions.forall {
- case InsertAction(_, assignments) =>
- AssignmentUtils.aligned(targetTable, assignments)
- case _ =>
- false
- }
-
- matchedActionsAligned && notMatchedActionsAligned
- }
-
- def condition: Option[Expression] = Some(mergeCondition)
-
- override def children: Seq[LogicalPlan] = if (rewritePlan.isDefined) {
- targetTable :: sourceTable :: rewritePlan.get :: Nil
- } else {
- targetTable :: sourceTable :: Nil
- }
-
- override def withNewRewritePlan(newRewritePlan: LogicalPlan): RowLevelCommand = {
- copy(rewritePlan = Some(newRewritePlan))
- }
-
- override protected def withNewChildrenInternal(
- newChildren: IndexedSeq[LogicalPlan]): MergeIntoIcebergTable = {
-
- newChildren match {
- case Seq(newTarget, newSource) =>
- copy(targetTable = newTarget, sourceTable = newSource, rewritePlan = None)
- case Seq(newTarget, newSource, newRewritePlan) =>
- copy(targetTable = newTarget, sourceTable = newSource, rewritePlan = Some(newRewritePlan))
- case _ =>
- throw new IllegalArgumentException("MergeIntoIcebergTable expects either two or three children")
- }
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/MergeRows.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/MergeRows.scala
deleted file mode 100644
index 3607194fe8c8..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/MergeRows.scala
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.catalyst.plans.logical
-
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.catalyst.expressions.AttributeSet
-import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.catalyst.util.truncatedString
-
-case class MergeRows(
- isSourceRowPresent: Expression,
- isTargetRowPresent: Expression,
- matchedConditions: Seq[Expression],
- matchedOutputs: Seq[Seq[Expression]],
- notMatchedConditions: Seq[Expression],
- notMatchedOutputs: Seq[Seq[Expression]],
- targetOutput: Seq[Expression],
- rowIdAttrs: Seq[Attribute],
- performCardinalityCheck: Boolean,
- emitNotMatchedTargetRows: Boolean,
- output: Seq[Attribute],
- child: LogicalPlan) extends UnaryNode {
-
- require(targetOutput.nonEmpty || !emitNotMatchedTargetRows)
-
- override lazy val producedAttributes: AttributeSet = {
- AttributeSet(output.filterNot(attr => inputSet.contains(attr)))
- }
-
- override lazy val references: AttributeSet = child.outputSet
-
- override def simpleString(maxFields: Int): String = {
- s"MergeRows${truncatedString(output, "[", ", ", "]", maxFields)}"
- }
-
- override protected def withNewChildInternal(newChild: LogicalPlan): LogicalPlan = {
- copy(child = newChild)
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/NoStatsUnaryNode.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/NoStatsUnaryNode.scala
deleted file mode 100644
index c21df71f069d..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/NoStatsUnaryNode.scala
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.catalyst.plans.logical
-
-import org.apache.spark.sql.catalyst.expressions.Attribute
-
-case class NoStatsUnaryNode(child: LogicalPlan) extends UnaryNode {
- override def output: Seq[Attribute] = child.output
- override def stats: Statistics = Statistics(Long.MaxValue)
-
- override protected def withNewChildInternal(newChild: LogicalPlan): LogicalPlan = {
- copy(child = newChild)
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/ReplaceData.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/ReplaceData.scala
deleted file mode 100644
index 3bf726ffb719..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/ReplaceData.scala
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.catalyst.plans.logical
-
-import org.apache.spark.sql.catalyst.analysis.NamedRelation
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.catalyst.expressions.AttributeSet
-import org.apache.spark.sql.catalyst.util.CharVarcharUtils
-import org.apache.spark.sql.connector.write.Write
-import org.apache.spark.sql.types.DataType
-
-/**
- * Replace data in an existing table.
- */
-case class ReplaceData(
- table: NamedRelation,
- query: LogicalPlan,
- originalTable: NamedRelation,
- write: Option[Write] = None) extends V2WriteCommandLike {
-
- override lazy val references: AttributeSet = query.outputSet
- override lazy val stringArgs: Iterator[Any] = Iterator(table, query, write)
-
- // the incoming query may include metadata columns
- lazy val dataInput: Seq[Attribute] = {
- val tableAttrNames = table.output.map(_.name)
- query.output.filter(attr => tableAttrNames.exists(conf.resolver(_, attr.name)))
- }
-
- override def outputResolved: Boolean = {
- assert(table.resolved && query.resolved,
- "`outputResolved` can only be called when `table` and `query` are both resolved.")
-
- // take into account only incoming data columns and ignore metadata columns in the query
- // they will be discarded after the logical write is built in the optimizer
- // metadata columns may be needed to request a correct distribution or ordering
- // but are not passed back to the data source during writes
-
- table.skipSchemaResolution || (dataInput.size == table.output.size &&
- dataInput.zip(table.output).forall { case (inAttr, outAttr) =>
- val outType = CharVarcharUtils.getRawType(outAttr.metadata).getOrElse(outAttr.dataType)
- // names and types must match, nullability must be compatible
- inAttr.name == outAttr.name &&
- DataType.equalsIgnoreCompatibleNullability(inAttr.dataType, outType) &&
- (outAttr.nullable || !inAttr.nullable)
- })
- }
-
- override protected def withNewChildInternal(newChild: LogicalPlan): ReplaceData = {
- copy(query = newChild)
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/ReplacePartitionField.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/ReplacePartitionField.scala
deleted file mode 100644
index 8c660c6f37b1..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/ReplacePartitionField.scala
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.catalyst.plans.logical
-
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.connector.expressions.Transform
-
-case class ReplacePartitionField(
- table: Seq[String],
- transformFrom: Transform,
- transformTo: Transform,
- name: Option[String]) extends LeafCommand {
- import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
-
- override lazy val output: Seq[Attribute] = Nil
-
- override def simpleString(maxFields: Int): String = {
- s"ReplacePartitionField ${table.quoted} ${transformFrom.describe} " +
- s"with ${name.map(n => s"$n=").getOrElse("")}${transformTo.describe}"
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/RowLevelCommand.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/RowLevelCommand.scala
deleted file mode 100644
index 837ee963bcea..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/RowLevelCommand.scala
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.catalyst.plans.logical
-
-import org.apache.spark.sql.catalyst.expressions.Expression
-
-trait RowLevelCommand extends Command with SupportsSubquery {
- def condition: Option[Expression]
- def rewritePlan: Option[LogicalPlan]
- def withNewRewritePlan(newRewritePlan: LogicalPlan): RowLevelCommand
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/SetIdentifierFields.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/SetIdentifierFields.scala
deleted file mode 100644
index a5fa28a617e7..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/SetIdentifierFields.scala
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.catalyst.plans.logical
-
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.connector.expressions.Transform
-
-case class SetIdentifierFields(
- table: Seq[String],
- fields: Seq[String]) extends LeafCommand {
- import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
-
- override lazy val output: Seq[Attribute] = Nil
-
- override def simpleString(maxFields: Int): String = {
- s"SetIdentifierFields ${table.quoted} (${fields.quoted})"
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/TagOptions.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/TagOptions.scala
deleted file mode 100644
index 85e3b95f4aba..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/TagOptions.scala
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.catalyst.plans.logical
-
-case class TagOptions(snapshotId: Option[Long], snapshotRefRetain: Option[Long])
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/UnresolvedMergeIntoIcebergTable.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/UnresolvedMergeIntoIcebergTable.scala
deleted file mode 100644
index 895aa733ff20..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/UnresolvedMergeIntoIcebergTable.scala
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.catalyst.plans.logical
-
-import org.apache.spark.sql.catalyst.expressions.Expression
-
-/**
- * A node that hides the MERGE condition and actions from regular Spark resolution.
- */
-case class UnresolvedMergeIntoIcebergTable(
- targetTable: LogicalPlan,
- sourceTable: LogicalPlan,
- context: MergeIntoContext) extends BinaryCommand {
-
- def duplicateResolved: Boolean = targetTable.outputSet.intersect(sourceTable.outputSet).isEmpty
-
- override def left: LogicalPlan = targetTable
- override def right: LogicalPlan = sourceTable
-
- override protected def withNewChildrenInternal(newLeft: LogicalPlan, newRight: LogicalPlan): LogicalPlan = {
- copy(targetTable = newLeft, sourceTable = newRight)
- }
-}
-
-case class MergeIntoContext(
- mergeCondition: Expression,
- matchedActions: Seq[MergeAction],
- notMatchedActions: Seq[MergeAction])
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/UpdateIcebergTable.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/UpdateIcebergTable.scala
deleted file mode 100644
index 790eb9380e3d..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/UpdateIcebergTable.scala
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.catalyst.plans.logical
-
-import org.apache.spark.sql.catalyst.expressions.AssignmentUtils
-import org.apache.spark.sql.catalyst.expressions.Expression
-
-case class UpdateIcebergTable(
- table: LogicalPlan,
- assignments: Seq[Assignment],
- condition: Option[Expression],
- rewritePlan: Option[LogicalPlan] = None) extends RowLevelCommand {
-
- lazy val aligned: Boolean = AssignmentUtils.aligned(table, assignments)
-
- override def children: Seq[LogicalPlan] = if (rewritePlan.isDefined) {
- table :: rewritePlan.get :: Nil
- } else {
- table :: Nil
- }
-
- override def withNewRewritePlan(newRewritePlan: LogicalPlan): RowLevelCommand = {
- copy(rewritePlan = Some(newRewritePlan))
- }
-
- override protected def withNewChildrenInternal(
- newChildren: IndexedSeq[LogicalPlan]): UpdateIcebergTable = {
- if (newChildren.size == 1) {
- copy(table = newChildren.head, rewritePlan = None)
- } else {
- require(newChildren.size == 2, "UpdateTable expects either one or two children")
- val Seq(newTable, newRewritePlan) = newChildren.take(2)
- copy(table = newTable, rewritePlan = Some(newRewritePlan))
- }
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/V2WriteCommandLike.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/V2WriteCommandLike.scala
deleted file mode 100644
index 9192d74b7caf..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/V2WriteCommandLike.scala
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.catalyst.plans.logical
-
-import org.apache.spark.sql.catalyst.analysis.NamedRelation
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.catalyst.expressions.AttributeSet
-
-// a node similar to V2WriteCommand in Spark but does not extend Command
-// as ReplaceData and WriteDelta that extend this trait are nested within other commands
-trait V2WriteCommandLike extends UnaryNode {
- def table: NamedRelation
- def query: LogicalPlan
- def outputResolved: Boolean
-
- override lazy val resolved: Boolean = table.resolved && query.resolved && outputResolved
-
- override def child: LogicalPlan = query
- override def output: Seq[Attribute] = Seq.empty
- override def producedAttributes: AttributeSet = outputSet
- // Commands are eagerly executed. They will be converted to LocalRelation after the DataFrame
- // is created. That said, the statistics of a command is useless. Here we just return a dummy
- // statistics to avoid unnecessary statistics calculation of command's children.
- override def stats: Statistics = Statistics.DUMMY
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/WriteDelta.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/WriteDelta.scala
deleted file mode 100644
index 534427cc0410..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/WriteDelta.scala
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.catalyst.plans.logical
-
-import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases
-import org.apache.spark.sql.catalyst.analysis.NamedRelation
-import org.apache.spark.sql.catalyst.expressions.AttributeReference
-import org.apache.spark.sql.catalyst.expressions.ExtendedV2ExpressionUtils
-import org.apache.spark.sql.catalyst.expressions.NamedExpression
-import org.apache.spark.sql.catalyst.util.CharVarcharUtils
-import org.apache.spark.sql.catalyst.util.RowDeltaUtils.OPERATION_COLUMN
-import org.apache.spark.sql.catalyst.util.WriteDeltaProjections
-import org.apache.spark.sql.connector.iceberg.write.DeltaWrite
-import org.apache.spark.sql.connector.iceberg.write.SupportsDelta
-import org.apache.spark.sql.connector.write.RowLevelOperationTable
-import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
-import org.apache.spark.sql.types.DataType
-import org.apache.spark.sql.types.IntegerType
-import org.apache.spark.sql.types.StructField
-
-/**
- * Writes a delta of rows to an existing table.
- */
-case class WriteDelta(
- table: NamedRelation,
- query: LogicalPlan,
- originalTable: NamedRelation,
- projections: WriteDeltaProjections,
- write: Option[DeltaWrite] = None) extends V2WriteCommandLike {
-
- override protected lazy val stringArgs: Iterator[Any] = Iterator(table, query, write)
-
- private def operationResolved: Boolean = {
- val attr = query.output.head
- attr.name == OPERATION_COLUMN && attr.dataType == IntegerType && !attr.nullable
- }
-
- private def operation: SupportsDelta = {
- EliminateSubqueryAliases(table) match {
- case DataSourceV2Relation(RowLevelOperationTable(_, operation), _, _, _, _) =>
- operation match {
- case supportsDelta: SupportsDelta =>
- supportsDelta
- case _ =>
- throw new AnalysisException(s"Operation $operation is not a delta operation")
- }
- case _ =>
- throw new AnalysisException(s"Cannot retrieve row-level operation from $table")
- }
- }
-
- private def rowAttrsResolved: Boolean = {
- table.skipSchemaResolution || (projections.rowProjection match {
- case Some(projection) =>
- table.output.size == projection.schema.size &&
- projection.schema.zip(table.output).forall { case (field, outAttr) =>
- isCompatible(field, outAttr)
- }
- case None =>
- true
- })
- }
-
- private def rowIdAttrsResolved: Boolean = {
- val rowIdAttrs = ExtendedV2ExpressionUtils.resolveRefs[AttributeReference](
- operation.rowId.toSeq,
- originalTable)
-
- projections.rowIdProjection.schema.forall { field =>
- rowIdAttrs.exists(rowIdAttr => isCompatible(field, rowIdAttr))
- }
- }
-
- private def metadataAttrsResolved: Boolean = {
- projections.metadataProjection match {
- case Some(projection) =>
- val metadataAttrs = ExtendedV2ExpressionUtils.resolveRefs[AttributeReference](
- operation.requiredMetadataAttributes.toSeq,
- originalTable)
-
- projection.schema.forall { field =>
- metadataAttrs.exists(metadataAttr => isCompatible(field, metadataAttr))
- }
- case None =>
- true
- }
- }
-
- private def isCompatible(projectionField: StructField, outAttr: NamedExpression): Boolean = {
- val inType = CharVarcharUtils.getRawType(projectionField.metadata).getOrElse(outAttr.dataType)
- val outType = CharVarcharUtils.getRawType(outAttr.metadata).getOrElse(outAttr.dataType)
- // names and types must match, nullability must be compatible
- projectionField.name == outAttr.name &&
- DataType.equalsIgnoreCompatibleNullability(inType, outType) &&
- (outAttr.nullable || !projectionField.nullable)
- }
-
- override def outputResolved: Boolean = {
- assert(table.resolved && query.resolved,
- "`outputResolved` can only be called when `table` and `query` are both resolved.")
-
- operationResolved && rowAttrsResolved && rowIdAttrsResolved && metadataAttrsResolved
- }
-
- override protected def withNewChildInternal(newChild: LogicalPlan): WriteDelta = {
- copy(query = newChild)
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
deleted file mode 100644
index be15f32bc1b8..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.catalyst.plans.logical
-
-import org.apache.spark.sql.catalyst.expressions.Expression
-
-/**
- * A CALL statement, as parsed from SQL.
- */
-case class CallStatement(name: Seq[String], args: Seq[CallArgument]) extends LeafParsedStatement
-
-/**
- * An argument in a CALL statement.
- */
-sealed trait CallArgument {
- def expr: Expression
-}
-
-/**
- * An argument in a CALL statement identified by name.
- */
-case class NamedArgument(name: String, expr: Expression) extends CallArgument
-
-/**
- * An argument in a CALL statement identified by position.
- */
-case class PositionalArgument(expr: Expression) extends CallArgument
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/util/RowDeltaUtils.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/util/RowDeltaUtils.scala
deleted file mode 100644
index 9f8f07b77db2..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/util/RowDeltaUtils.scala
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.catalyst.util
-
-object RowDeltaUtils {
- final val OPERATION_COLUMN: String = "__row_operation"
- final val DELETE_OPERATION: Int = 1
- final val UPDATE_OPERATION: Int = 2
- final val INSERT_OPERATION: Int = 3
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/util/WriteDeltaProjections.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/util/WriteDeltaProjections.scala
deleted file mode 100644
index e4e8b147d139..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/util/WriteDeltaProjections.scala
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.catalyst.util
-
-import org.apache.spark.sql.catalyst.ProjectingInternalRow
-
-case class WriteDeltaProjections(
- rowProjection: Option[ProjectingInternalRow],
- rowIdProjection: ProjectingInternalRow,
- metadataProjection: Option[ProjectingInternalRow])
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/connector/expressions/TruncateTransform.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/connector/expressions/TruncateTransform.scala
deleted file mode 100644
index 2a3269e2db1d..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/connector/expressions/TruncateTransform.scala
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.connector.expressions
-
-import org.apache.spark.sql.types.IntegerType
-
-private[sql] object TruncateTransform {
- def unapply(expr: Expression): Option[(Int, FieldReference)] = expr match {
- case transform: Transform =>
- transform match {
- case NamedTransform("truncate", Seq(Ref(seq: Seq[String]), Lit(value: Int, IntegerType))) =>
- Some((value, FieldReference(seq)))
- case NamedTransform("truncate", Seq(Lit(value: Int, IntegerType), Ref(seq: Seq[String]))) =>
- Some((value, FieldReference(seq)))
- case _ =>
- None
- }
- case _ =>
- None
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/connector/write/ExtendedLogicalWriteInfoImpl.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/connector/write/ExtendedLogicalWriteInfoImpl.scala
deleted file mode 100644
index 0ae10ada5a39..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/connector/write/ExtendedLogicalWriteInfoImpl.scala
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.connector.write
-
-import org.apache.spark.sql.connector.iceberg.write.ExtendedLogicalWriteInfo
-import org.apache.spark.sql.types.StructType
-import org.apache.spark.sql.util.CaseInsensitiveStringMap
-
-private[sql] case class ExtendedLogicalWriteInfoImpl(
- queryId: String,
- schema: StructType,
- options: CaseInsensitiveStringMap,
- rowIdSchema: StructType = null,
- metadataSchema: StructType = null) extends ExtendedLogicalWriteInfo
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/connector/write/RowLevelOperationInfoImpl.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/connector/write/RowLevelOperationInfoImpl.scala
deleted file mode 100644
index fca42feaed51..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/connector/write/RowLevelOperationInfoImpl.scala
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.connector.write
-
-import org.apache.spark.sql.connector.iceberg.write.RowLevelOperation.Command
-import org.apache.spark.sql.connector.iceberg.write.RowLevelOperationInfo
-import org.apache.spark.sql.util.CaseInsensitiveStringMap
-
-case class RowLevelOperationInfoImpl(
- command: Command,
- options: CaseInsensitiveStringMap) extends RowLevelOperationInfo
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/connector/write/RowLevelOperationTable.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/connector/write/RowLevelOperationTable.scala
deleted file mode 100644
index b6cca0af15bb..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/connector/write/RowLevelOperationTable.scala
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.connector.write
-
-import java.util
-import org.apache.spark.sql.connector.catalog.SupportsRead
-import org.apache.spark.sql.connector.catalog.SupportsWrite
-import org.apache.spark.sql.connector.catalog.Table
-import org.apache.spark.sql.connector.catalog.TableCapability
-import org.apache.spark.sql.connector.iceberg.catalog.SupportsRowLevelOperations
-import org.apache.spark.sql.connector.iceberg.write.ExtendedLogicalWriteInfo
-import org.apache.spark.sql.connector.iceberg.write.RowLevelOperation
-import org.apache.spark.sql.connector.read.ScanBuilder
-import org.apache.spark.sql.types.StructType
-import org.apache.spark.sql.util.CaseInsensitiveStringMap
-
-/**
- * An internal v2 table implementation that wraps the original table during DELETE, UPDATE,
- * MERGE operations.
- */
-case class RowLevelOperationTable(
- table: Table with SupportsRowLevelOperations,
- operation: RowLevelOperation) extends Table with SupportsRead with SupportsWrite {
-
- override def name: String = table.name
- override def schema: StructType = table.schema
- override def capabilities: util.Set[TableCapability] = table.capabilities
- override def toString: String = table.toString
-
- override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = {
- operation.newScanBuilder(options)
- }
-
- override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = {
- operation.newWriteBuilder(info.asInstanceOf[ExtendedLogicalWriteInfo])
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AddPartitionFieldExec.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AddPartitionFieldExec.scala
deleted file mode 100644
index 55f327f7e45e..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AddPartitionFieldExec.scala
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.execution.datasources.v2
-
-import org.apache.iceberg.spark.Spark3Util
-import org.apache.iceberg.spark.source.SparkTable
-import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.connector.catalog.Identifier
-import org.apache.spark.sql.connector.catalog.TableCatalog
-import org.apache.spark.sql.connector.expressions.Transform
-
-case class AddPartitionFieldExec(
- catalog: TableCatalog,
- ident: Identifier,
- transform: Transform,
- name: Option[String]) extends LeafV2CommandExec {
- import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
-
- override lazy val output: Seq[Attribute] = Nil
-
- override protected def run(): Seq[InternalRow] = {
- catalog.loadTable(ident) match {
- case iceberg: SparkTable =>
- iceberg.table.updateSpec()
- .addField(name.orNull, Spark3Util.toIcebergTerm(transform))
- .commit()
-
- case table =>
- throw new UnsupportedOperationException(s"Cannot add partition field to non-Iceberg table: $table")
- }
-
- Nil
- }
-
- override def simpleString(maxFields: Int): String = {
- s"AddPartitionField ${catalog.name}.${ident.quoted} ${name.map(n => s"$n=").getOrElse("")}${transform.describe}"
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CallExec.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CallExec.scala
deleted file mode 100644
index f66962a8c453..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CallExec.scala
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.execution.datasources.v2
-
-import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.catalyst.util.truncatedString
-import org.apache.spark.sql.connector.iceberg.catalog.Procedure
-import scala.collection.compat.immutable.ArraySeq
-
-case class CallExec(
- output: Seq[Attribute],
- procedure: Procedure,
- input: InternalRow) extends LeafV2CommandExec {
-
- override protected def run(): Seq[InternalRow] = {
- ArraySeq.unsafeWrapArray(procedure.call(input))
- }
-
- override def simpleString(maxFields: Int): String = {
- s"CallExec${truncatedString(output, "[", ", ", "]", maxFields)} ${procedure.description}"
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateOrReplaceBranchExec.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateOrReplaceBranchExec.scala
deleted file mode 100644
index ecf1489e0854..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateOrReplaceBranchExec.scala
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.execution.datasources.v2
-
-import org.apache.iceberg.relocated.com.google.common.base.Preconditions
-import org.apache.iceberg.spark.source.SparkTable
-import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.catalyst.plans.logical.BranchOptions
-import org.apache.spark.sql.connector.catalog.Identifier
-import org.apache.spark.sql.connector.catalog.TableCatalog
-
-case class CreateOrReplaceBranchExec(
- catalog: TableCatalog,
- ident: Identifier,
- branch: String,
- branchOptions: BranchOptions,
- create: Boolean,
- replace: Boolean,
- ifNotExists: Boolean) extends LeafV2CommandExec {
-
- import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
-
- override lazy val output: Seq[Attribute] = Nil
-
- override protected def run(): Seq[InternalRow] = {
- catalog.loadTable(ident) match {
- case iceberg: SparkTable =>
- val snapshotId: java.lang.Long = branchOptions.snapshotId
- .orElse(Option(iceberg.table.currentSnapshot()).map(_.snapshotId()))
- .map(java.lang.Long.valueOf)
- .orNull
-
- val manageSnapshots = iceberg.table().manageSnapshots()
- val refExists = null != iceberg.table().refs().get(branch)
-
- def safeCreateBranch(): Unit = {
- if (snapshotId == null) {
- manageSnapshots.createBranch(branch)
- } else {
- manageSnapshots.createBranch(branch, snapshotId)
- }
- }
-
- if (create && replace && !refExists) {
- safeCreateBranch()
- } else if (replace) {
- Preconditions.checkArgument(snapshotId != null,
- "Cannot complete replace branch operation on %s, main has no snapshot", ident)
- manageSnapshots.replaceBranch(branch, snapshotId)
- } else {
- if (refExists && ifNotExists) {
- return Nil
- }
-
- safeCreateBranch()
- }
-
- if (branchOptions.numSnapshots.nonEmpty) {
- manageSnapshots.setMinSnapshotsToKeep(branch, branchOptions.numSnapshots.get.toInt)
- }
-
- if (branchOptions.snapshotRetain.nonEmpty) {
- manageSnapshots.setMaxSnapshotAgeMs(branch, branchOptions.snapshotRetain.get)
- }
-
- if (branchOptions.snapshotRefRetain.nonEmpty) {
- manageSnapshots.setMaxRefAgeMs(branch, branchOptions.snapshotRefRetain.get)
- }
-
- manageSnapshots.commit()
-
- case table =>
- throw new UnsupportedOperationException(s"Cannot create or replace branch on non-Iceberg table: $table")
- }
-
- Nil
- }
-
- override def simpleString(maxFields: Int): String = {
- s"CreateOrReplace branch: $branch for table: ${ident.quoted}"
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateOrReplaceTagExec.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateOrReplaceTagExec.scala
deleted file mode 100644
index 372cd7548632..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateOrReplaceTagExec.scala
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.execution.datasources.v2
-
-import org.apache.iceberg.relocated.com.google.common.base.Preconditions
-import org.apache.iceberg.spark.source.SparkTable
-import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.catalyst.plans.logical.TagOptions
-import org.apache.spark.sql.connector.catalog._
-
-case class CreateOrReplaceTagExec(
- catalog: TableCatalog,
- ident: Identifier,
- tag: String,
- tagOptions: TagOptions,
- create: Boolean,
- replace: Boolean,
- ifNotExists: Boolean) extends LeafV2CommandExec {
-
- import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
-
- override lazy val output: Seq[Attribute] = Nil
-
- override protected def run(): Seq[InternalRow] = {
- catalog.loadTable(ident) match {
- case iceberg: SparkTable =>
- val snapshotId: java.lang.Long = tagOptions.snapshotId
- .orElse(Option(iceberg.table.currentSnapshot()).map(_.snapshotId()))
- .map(java.lang.Long.valueOf)
- .orNull
-
- Preconditions.checkArgument(snapshotId != null,
- "Cannot complete create or replace tag operation on %s, main has no snapshot", ident)
-
- val manageSnapshot = iceberg.table.manageSnapshots()
- val refExists = null != iceberg.table().refs().get(tag)
-
- if (create && replace && !refExists) {
- manageSnapshot.createTag(tag, snapshotId)
- } else if (replace) {
- manageSnapshot.replaceTag(tag, snapshotId)
- } else {
- if (refExists && ifNotExists) {
- return Nil
- }
-
- manageSnapshot.createTag(tag, snapshotId)
- }
-
- if (tagOptions.snapshotRefRetain.nonEmpty) {
- manageSnapshot.setMaxRefAgeMs(tag, tagOptions.snapshotRefRetain.get)
- }
-
- manageSnapshot.commit()
-
- case table =>
- throw new UnsupportedOperationException(s"Cannot create tag to non-Iceberg table: $table")
- }
-
- Nil
- }
-
- override def simpleString(maxFields: Int): String = {
- s"Create tag: $tag for table: ${ident.quoted}"
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropBranchExec.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropBranchExec.scala
deleted file mode 100644
index ff8f1820099a..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropBranchExec.scala
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.execution.datasources.v2
-
-import org.apache.iceberg.spark.source.SparkTable
-import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.connector.catalog.Identifier
-import org.apache.spark.sql.connector.catalog.TableCatalog
-
-case class DropBranchExec(
- catalog: TableCatalog,
- ident: Identifier,
- branch: String,
- ifExists: Boolean) extends LeafV2CommandExec {
-
- import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
-
- override lazy val output: Seq[Attribute] = Nil
-
- override protected def run(): Seq[InternalRow] = {
- catalog.loadTable(ident) match {
- case iceberg: SparkTable =>
- val ref = iceberg.table().refs().get(branch)
- if (ref != null || !ifExists) {
- iceberg.table().manageSnapshots().removeBranch(branch).commit()
- }
-
- case table =>
- throw new UnsupportedOperationException(s"Cannot drop branch on non-Iceberg table: $table")
- }
-
- Nil
- }
-
- override def simpleString(maxFields: Int): String = {
- s"DropBranch branch: ${branch} for table: ${ident.quoted}"
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropIdentifierFieldsExec.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropIdentifierFieldsExec.scala
deleted file mode 100644
index dee778b474f9..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropIdentifierFieldsExec.scala
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.execution.datasources.v2
-
-import org.apache.iceberg.relocated.com.google.common.base.Preconditions
-import org.apache.iceberg.relocated.com.google.common.collect.Sets
-import org.apache.iceberg.spark.source.SparkTable
-import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.connector.catalog.Identifier
-import org.apache.spark.sql.connector.catalog.TableCatalog
-
-case class DropIdentifierFieldsExec(
- catalog: TableCatalog,
- ident: Identifier,
- fields: Seq[String]) extends LeafV2CommandExec {
- import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
-
- override lazy val output: Seq[Attribute] = Nil
-
- override protected def run(): Seq[InternalRow] = {
- catalog.loadTable(ident) match {
- case iceberg: SparkTable =>
- val schema = iceberg.table.schema
- val identifierFieldNames = Sets.newHashSet(schema.identifierFieldNames)
-
- for (name <- fields) {
- Preconditions.checkArgument(schema.findField(name) != null,
- "Cannot complete drop identifier fields operation: field %s not found", name)
- Preconditions.checkArgument(identifierFieldNames.contains(name),
- "Cannot complete drop identifier fields operation: %s is not an identifier field", name)
- identifierFieldNames.remove(name)
- }
-
- iceberg.table.updateSchema()
- .setIdentifierFields(identifierFieldNames)
- .commit();
- case table =>
- throw new UnsupportedOperationException(s"Cannot drop identifier fields in non-Iceberg table: $table")
- }
-
- Nil
- }
-
- override def simpleString(maxFields: Int): String = {
- s"DropIdentifierFields ${catalog.name}.${ident.quoted} (${fields.quoted})";
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropPartitionFieldExec.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropPartitionFieldExec.scala
deleted file mode 100644
index 9a153f0c004e..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropPartitionFieldExec.scala
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.execution.datasources.v2
-
-import org.apache.iceberg.spark.Spark3Util
-import org.apache.iceberg.spark.source.SparkTable
-import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.connector.catalog.Identifier
-import org.apache.spark.sql.connector.catalog.TableCatalog
-import org.apache.spark.sql.connector.expressions.FieldReference
-import org.apache.spark.sql.connector.expressions.IdentityTransform
-import org.apache.spark.sql.connector.expressions.Transform
-
-case class DropPartitionFieldExec(
- catalog: TableCatalog,
- ident: Identifier,
- transform: Transform) extends LeafV2CommandExec {
- import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
-
- override lazy val output: Seq[Attribute] = Nil
-
- override protected def run(): Seq[InternalRow] = {
- catalog.loadTable(ident) match {
- case iceberg: SparkTable =>
- val schema = iceberg.table.schema
- transform match {
- case IdentityTransform(FieldReference(parts)) if parts.size == 1 && schema.findField(parts.head) == null =>
- // the name is not present in the Iceberg schema, so it must be a partition field name, not a column name
- iceberg.table.updateSpec()
- .removeField(parts.head)
- .commit()
-
- case _ =>
- iceberg.table.updateSpec()
- .removeField(Spark3Util.toIcebergTerm(transform))
- .commit()
- }
-
- case table =>
- throw new UnsupportedOperationException(s"Cannot drop partition field in non-Iceberg table: $table")
- }
-
- Nil
- }
-
- override def simpleString(maxFields: Int): String = {
- s"DropPartitionField ${catalog.name}.${ident.quoted} ${transform.describe}"
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTagExec.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTagExec.scala
deleted file mode 100644
index 0a1c17c0b1b2..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTagExec.scala
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.execution.datasources.v2
-
-import org.apache.iceberg.spark.source.SparkTable
-import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.connector.catalog.Identifier
-import org.apache.spark.sql.connector.catalog.TableCatalog
-
-case class DropTagExec(
- catalog: TableCatalog,
- ident: Identifier,
- tag: String,
- ifExists: Boolean) extends LeafV2CommandExec {
-
- import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
-
- override lazy val output: Seq[Attribute] = Nil
-
- override protected def run(): Seq[InternalRow] = {
- catalog.loadTable(ident) match {
- case iceberg: SparkTable =>
- val ref = iceberg.table().refs().get(tag)
- if (ref != null || !ifExists) {
- iceberg.table().manageSnapshots().removeTag(tag).commit()
- }
-
- case table =>
- throw new UnsupportedOperationException(s"Cannot drop tag on non-Iceberg table: $table")
- }
-
- Nil
- }
-
- override def simpleString(maxFields: Int): String = {
- s"DropTag tag: ${tag} for table: ${ident.quoted}"
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ExtendedDataSourceV2Implicits.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ExtendedDataSourceV2Implicits.scala
deleted file mode 100644
index 85bda0b08d46..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ExtendedDataSourceV2Implicits.scala
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.execution.datasources.v2
-
-import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.connector.catalog.Table
-import org.apache.spark.sql.connector.write.RowLevelOperationTable
-
-/**
- * A class similar to DataSourceV2Implicits in Spark but contains custom implicit helpers.
- */
-object ExtendedDataSourceV2Implicits {
- implicit class TableHelper(table: Table) {
- def asRowLevelOperationTable: RowLevelOperationTable = {
- table match {
- case rowLevelOperationTable: RowLevelOperationTable =>
- rowLevelOperationTable
- case _ =>
- throw new AnalysisException(s"Table ${table.name} is not a row-level operation table")
- }
- }
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ExtendedDataSourceV2Strategy.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ExtendedDataSourceV2Strategy.scala
deleted file mode 100644
index 0a27d49287f2..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ExtendedDataSourceV2Strategy.scala
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.execution.datasources.v2
-
-import org.apache.iceberg.spark.Spark3Util
-import org.apache.iceberg.spark.SparkCatalog
-import org.apache.iceberg.spark.SparkSessionCatalog
-import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.Strategy
-import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.catalyst.expressions.GenericInternalRow
-import org.apache.spark.sql.catalyst.expressions.Literal
-import org.apache.spark.sql.catalyst.expressions.PredicateHelper
-import org.apache.spark.sql.catalyst.plans.logical.AddPartitionField
-import org.apache.spark.sql.catalyst.plans.logical.Call
-import org.apache.spark.sql.catalyst.plans.logical.CreateOrReplaceBranch
-import org.apache.spark.sql.catalyst.plans.logical.CreateOrReplaceTag
-import org.apache.spark.sql.catalyst.plans.logical.DeleteFromIcebergTable
-import org.apache.spark.sql.catalyst.plans.logical.DropBranch
-import org.apache.spark.sql.catalyst.plans.logical.DropIdentifierFields
-import org.apache.spark.sql.catalyst.plans.logical.DropPartitionField
-import org.apache.spark.sql.catalyst.plans.logical.DropTag
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.plans.logical.MergeRows
-import org.apache.spark.sql.catalyst.plans.logical.NoStatsUnaryNode
-import org.apache.spark.sql.catalyst.plans.logical.ReplaceData
-import org.apache.spark.sql.catalyst.plans.logical.ReplacePartitionField
-import org.apache.spark.sql.catalyst.plans.logical.SetIdentifierFields
-import org.apache.spark.sql.catalyst.plans.logical.SetWriteDistributionAndOrdering
-import org.apache.spark.sql.catalyst.plans.logical.WriteDelta
-import org.apache.spark.sql.connector.catalog.Identifier
-import org.apache.spark.sql.connector.catalog.TableCatalog
-import org.apache.spark.sql.errors.QueryCompilationErrors
-import org.apache.spark.sql.execution.SparkPlan
-import org.apache.spark.sql.execution.datasources.DataSourceStrategy
-import scala.jdk.CollectionConverters._
-
-case class ExtendedDataSourceV2Strategy(spark: SparkSession) extends Strategy with PredicateHelper {
-
- import DataSourceV2Implicits._
-
- override def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
- case c @ Call(procedure, args) =>
- val input = buildInternalRow(args)
- CallExec(c.output, procedure, input) :: Nil
-
- case AddPartitionField(IcebergCatalogAndIdentifier(catalog, ident), transform, name) =>
- AddPartitionFieldExec(catalog, ident, transform, name) :: Nil
-
- case DropPartitionField(IcebergCatalogAndIdentifier(catalog, ident), transform) =>
- DropPartitionFieldExec(catalog, ident, transform) :: Nil
-
- case ReplacePartitionField(IcebergCatalogAndIdentifier(catalog, ident), transformFrom, transformTo, name) =>
- ReplacePartitionFieldExec(catalog, ident, transformFrom, transformTo, name) :: Nil
-
- case CreateOrReplaceBranch(
- IcebergCatalogAndIdentifier(catalog, ident), branch, branchOptions, create, replace, ifNotExists) =>
- CreateOrReplaceBranchExec(catalog, ident, branch, branchOptions, create, replace, ifNotExists) :: Nil
-
- case CreateOrReplaceTag(
- IcebergCatalogAndIdentifier(catalog, ident), tag, tagOptions, create, replace, ifNotExists) =>
- CreateOrReplaceTagExec(catalog, ident, tag, tagOptions, create, replace, ifNotExists) :: Nil
-
- case DropBranch(IcebergCatalogAndIdentifier(catalog, ident), branch, ifExists) =>
- DropBranchExec(catalog, ident, branch, ifExists) :: Nil
-
- case DropTag(IcebergCatalogAndIdentifier(catalog, ident), tag, ifExists) =>
- DropTagExec(catalog, ident, tag, ifExists) :: Nil
-
- case SetIdentifierFields(IcebergCatalogAndIdentifier(catalog, ident), fields) =>
- SetIdentifierFieldsExec(catalog, ident, fields) :: Nil
-
- case DropIdentifierFields(IcebergCatalogAndIdentifier(catalog, ident), fields) =>
- DropIdentifierFieldsExec(catalog, ident, fields) :: Nil
-
- case SetWriteDistributionAndOrdering(
- IcebergCatalogAndIdentifier(catalog, ident), distributionMode, ordering) =>
- SetWriteDistributionAndOrderingExec(catalog, ident, distributionMode, ordering) :: Nil
-
- case ReplaceData(_: DataSourceV2Relation, query, r: DataSourceV2Relation, Some(write)) =>
- // refresh the cache using the original relation
- ReplaceDataExec(planLater(query), refreshCache(r), write) :: Nil
-
- case WriteDelta(_: DataSourceV2Relation, query, r: DataSourceV2Relation, projs, Some(write)) =>
- // refresh the cache using the original relation
- WriteDeltaExec(planLater(query), refreshCache(r), projs, write) :: Nil
-
- case MergeRows(isSourceRowPresent, isTargetRowPresent, matchedConditions, matchedOutputs, notMatchedConditions,
- notMatchedOutputs, targetOutput, rowIdAttrs, performCardinalityCheck, emitNotMatchedTargetRows,
- output, child) =>
-
- MergeRowsExec(isSourceRowPresent, isTargetRowPresent, matchedConditions, matchedOutputs, notMatchedConditions,
- notMatchedOutputs, targetOutput, rowIdAttrs, performCardinalityCheck, emitNotMatchedTargetRows,
- output, planLater(child)) :: Nil
-
- case DeleteFromIcebergTable(DataSourceV2ScanRelation(r, _, output), condition, None) =>
- // the optimizer has already checked that this delete can be handled using a metadata operation
- val deleteCond = condition.getOrElse(Literal.TrueLiteral)
- val predicates = splitConjunctivePredicates(deleteCond)
- val normalizedPredicates = DataSourceStrategy.normalizeExprs(predicates, output)
- val filters = normalizedPredicates.flatMap { pred =>
- val filter = DataSourceStrategy.translateFilter(pred, supportNestedPredicatePushdown = true)
- if (filter.isEmpty) {
- throw QueryCompilationErrors.cannotTranslateExpressionToSourceFilterError(pred)
- }
- filter
- }.toArray
- DeleteFromTableExec(r.table.asDeletable, filters, refreshCache(r)) :: Nil
-
- case NoStatsUnaryNode(child) =>
- planLater(child) :: Nil
-
- case _ => Nil
- }
-
- private def buildInternalRow(exprs: Seq[Expression]): InternalRow = {
- val values = new Array[Any](exprs.size)
- for (index <- exprs.indices) {
- values(index) = exprs(index).eval()
- }
- new GenericInternalRow(values)
- }
-
- private def refreshCache(r: DataSourceV2Relation)(): Unit = {
- spark.sharedState.cacheManager.recacheByPlan(spark, r)
- }
-
- private object IcebergCatalogAndIdentifier {
- def unapply(identifier: Seq[String]): Option[(TableCatalog, Identifier)] = {
- val catalogAndIdentifier = Spark3Util.catalogAndIdentifier(spark, identifier.asJava)
- catalogAndIdentifier.catalog match {
- case icebergCatalog: SparkCatalog =>
- Some((icebergCatalog, catalogAndIdentifier.identifier))
- case icebergCatalog: SparkSessionCatalog[_] =>
- Some((icebergCatalog, catalogAndIdentifier.identifier))
- case _ =>
- None
- }
- }
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ExtendedDistributionAndOrderingUtils.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ExtendedDistributionAndOrderingUtils.scala
deleted file mode 100644
index 8c37b1b75924..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ExtendedDistributionAndOrderingUtils.scala
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.execution.datasources.v2
-
-import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.catalyst.expressions.ExtendedV2ExpressionUtils.toCatalyst
-import org.apache.spark.sql.catalyst.expressions.SortOrder
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.plans.logical.RepartitionByExpression
-import org.apache.spark.sql.catalyst.plans.logical.Sort
-import org.apache.spark.sql.connector.distributions.ClusteredDistribution
-import org.apache.spark.sql.connector.distributions.OrderedDistribution
-import org.apache.spark.sql.connector.distributions.UnspecifiedDistribution
-import org.apache.spark.sql.connector.write.RequiresDistributionAndOrdering
-import org.apache.spark.sql.connector.write.Write
-import org.apache.spark.sql.errors.QueryCompilationErrors
-import org.apache.spark.sql.internal.SQLConf
-import scala.collection.compat.immutable.ArraySeq
-
-/**
- * A rule that is inspired by DistributionAndOrderingUtils in Spark but supports Iceberg transforms.
- *
- * Note that similarly to the original rule in Spark, it does not let AQE pick the number of shuffle
- * partitions. See SPARK-34230 for context.
- */
-object ExtendedDistributionAndOrderingUtils {
-
- def prepareQuery(write: Write, query: LogicalPlan, conf: SQLConf): LogicalPlan = write match {
- case write: RequiresDistributionAndOrdering =>
- val numPartitions = write.requiredNumPartitions()
- val distribution = write.requiredDistribution match {
- case d: OrderedDistribution => d.ordering.map(e => toCatalyst(e, query))
- case d: ClusteredDistribution => d.clustering.map(e => toCatalyst(e, query))
- case _: UnspecifiedDistribution => Array.empty[Expression]
- }
-
- val queryWithDistribution = if (distribution.nonEmpty) {
- val finalNumPartitions = if (numPartitions > 0) {
- numPartitions
- } else {
- conf.numShufflePartitions
- }
- // the conversion to catalyst expressions above produces SortOrder expressions
- // for OrderedDistribution and generic expressions for ClusteredDistribution
- // this allows RepartitionByExpression to pick either range or hash partitioning
- RepartitionByExpression(ArraySeq.unsafeWrapArray(distribution), query, finalNumPartitions)
- } else if (numPartitions > 0) {
- throw QueryCompilationErrors.numberOfPartitionsNotAllowedWithUnspecifiedDistributionError()
- } else {
- query
- }
-
- val ordering = write.requiredOrdering.toSeq
- .map(e => toCatalyst(e, query))
- .asInstanceOf[Seq[SortOrder]]
-
- val queryWithDistributionAndOrdering = if (ordering.nonEmpty) {
- Sort(ordering, global = false, queryWithDistribution)
- } else {
- queryWithDistribution
- }
-
- queryWithDistributionAndOrdering
-
- case _ =>
- query
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ExtendedV2Writes.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ExtendedV2Writes.scala
deleted file mode 100644
index 86cab35cb36e..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ExtendedV2Writes.scala
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.execution.datasources.v2
-
-import java.util.UUID
-import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.expressions.PredicateHelper
-import org.apache.spark.sql.catalyst.plans.logical.AppendData
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.plans.logical.OverwriteByExpression
-import org.apache.spark.sql.catalyst.plans.logical.OverwritePartitionsDynamic
-import org.apache.spark.sql.catalyst.plans.logical.Project
-import org.apache.spark.sql.catalyst.plans.logical.ReplaceData
-import org.apache.spark.sql.catalyst.plans.logical.WriteDelta
-import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.catalyst.utils.PlanUtils.isIcebergRelation
-import org.apache.spark.sql.connector.catalog.Table
-import org.apache.spark.sql.connector.iceberg.write.DeltaWriteBuilder
-import org.apache.spark.sql.connector.write.ExtendedLogicalWriteInfoImpl
-import org.apache.spark.sql.connector.write.SupportsDynamicOverwrite
-import org.apache.spark.sql.connector.write.SupportsOverwrite
-import org.apache.spark.sql.connector.write.SupportsTruncate
-import org.apache.spark.sql.connector.write.WriteBuilder
-import org.apache.spark.sql.errors.QueryCompilationErrors
-import org.apache.spark.sql.errors.QueryExecutionErrors
-import org.apache.spark.sql.execution.datasources.DataSourceStrategy
-import org.apache.spark.sql.sources.AlwaysTrue
-import org.apache.spark.sql.sources.Filter
-import org.apache.spark.sql.types.StructType
-
-/**
- * A rule that is inspired by V2Writes in Spark but supports Iceberg transforms.
- */
-object ExtendedV2Writes extends Rule[LogicalPlan] with PredicateHelper {
-
- import DataSourceV2Implicits._
-
- override def apply(plan: LogicalPlan): LogicalPlan = plan transformDown {
- case a @ AppendData(r: DataSourceV2Relation, query, options, _, None) if isIcebergRelation(r) =>
- val writeBuilder = newWriteBuilder(r.table, query.schema, options)
- val write = writeBuilder.build()
- val newQuery = ExtendedDistributionAndOrderingUtils.prepareQuery(write, query, conf)
- a.copy(write = Some(write), query = newQuery)
-
- case o @ OverwriteByExpression(r: DataSourceV2Relation, deleteExpr, query, options, _, None)
- if isIcebergRelation(r) =>
- // fail if any filter cannot be converted. correctness depends on removing all matching data.
- val filters = splitConjunctivePredicates(deleteExpr).flatMap { pred =>
- val filter = DataSourceStrategy.translateFilter(pred, supportNestedPredicatePushdown = true)
- if (filter.isEmpty) {
- throw QueryCompilationErrors.cannotTranslateExpressionToSourceFilterError(pred)
- }
- filter
- }.toArray
-
- val table = r.table
- val writeBuilder = newWriteBuilder(table, query.schema, options)
- val write = writeBuilder match {
- case builder: SupportsTruncate if isTruncate(filters) =>
- builder.truncate().build()
- case builder: SupportsOverwrite =>
- builder.overwrite(filters).build()
- case _ =>
- throw QueryExecutionErrors.overwriteTableByUnsupportedExpressionError(table)
- }
-
- val newQuery = ExtendedDistributionAndOrderingUtils.prepareQuery(write, query, conf)
- o.copy(write = Some(write), query = newQuery)
-
- case o @ OverwritePartitionsDynamic(r: DataSourceV2Relation, query, options, _, None)
- if isIcebergRelation(r) =>
- val table = r.table
- val writeBuilder = newWriteBuilder(table, query.schema, options)
- val write = writeBuilder match {
- case builder: SupportsDynamicOverwrite =>
- builder.overwriteDynamicPartitions().build()
- case _ =>
- throw QueryExecutionErrors.dynamicPartitionOverwriteUnsupportedByTableError(table)
- }
- val newQuery = ExtendedDistributionAndOrderingUtils.prepareQuery(write, query, conf)
- o.copy(write = Some(write), query = newQuery)
-
- case rd @ ReplaceData(r: DataSourceV2Relation, query, _, None) =>
- val rowSchema = StructType.fromAttributes(rd.dataInput)
- val writeBuilder = newWriteBuilder(r.table, rowSchema, Map.empty)
- val write = writeBuilder.build()
- val newQuery = ExtendedDistributionAndOrderingUtils.prepareQuery(write, query, conf)
- rd.copy(write = Some(write), query = Project(rd.dataInput, newQuery))
-
- case wd @ WriteDelta(r: DataSourceV2Relation, query, _, projections, None) =>
- val rowSchema = projections.rowProjection.map(_.schema).orNull
- val rowIdSchema = projections.rowIdProjection.schema
- val metadataSchema = projections.metadataProjection.map(_.schema).orNull
- val writeBuilder = newWriteBuilder(r.table, rowSchema, Map.empty, rowIdSchema, metadataSchema)
- writeBuilder match {
- case builder: DeltaWriteBuilder =>
- val deltaWrite = builder.build()
- val newQuery = ExtendedDistributionAndOrderingUtils.prepareQuery(deltaWrite, query, conf)
- wd.copy(write = Some(deltaWrite), query = newQuery)
- case other =>
- throw new AnalysisException(s"$other is not DeltaWriteBuilder")
- }
- }
-
- private def isTruncate(filters: Array[Filter]): Boolean = {
- filters.length == 1 && filters(0).isInstanceOf[AlwaysTrue]
- }
-
- private def newWriteBuilder(
- table: Table,
- rowSchema: StructType,
- writeOptions: Map[String, String],
- rowIdSchema: StructType = null,
- metadataSchema: StructType = null): WriteBuilder = {
- val info = ExtendedLogicalWriteInfoImpl(
- queryId = UUID.randomUUID().toString,
- rowSchema,
- writeOptions.asOptions,
- rowIdSchema,
- metadataSchema)
- table.asWritable.newWriteBuilder(info)
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/MergeRowsExec.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/MergeRowsExec.scala
deleted file mode 100644
index 4fbf8a523a54..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/MergeRowsExec.scala
+++ /dev/null
@@ -1,190 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.execution.datasources.v2
-
-import org.apache.spark.SparkException
-import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.Ascending
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.catalyst.expressions.AttributeSet
-import org.apache.spark.sql.catalyst.expressions.BasePredicate
-import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.catalyst.expressions.SortOrder
-import org.apache.spark.sql.catalyst.expressions.UnsafeProjection
-import org.apache.spark.sql.catalyst.expressions.codegen.GeneratePredicate
-import org.apache.spark.sql.catalyst.util.truncatedString
-import org.apache.spark.sql.execution.SparkPlan
-import org.apache.spark.sql.execution.UnaryExecNode
-
-case class MergeRowsExec(
- isSourceRowPresent: Expression,
- isTargetRowPresent: Expression,
- matchedConditions: Seq[Expression],
- matchedOutputs: Seq[Seq[Expression]],
- notMatchedConditions: Seq[Expression],
- notMatchedOutputs: Seq[Seq[Expression]],
- targetOutput: Seq[Expression],
- rowIdAttrs: Seq[Attribute],
- performCardinalityCheck: Boolean,
- emitNotMatchedTargetRows: Boolean,
- output: Seq[Attribute],
- child: SparkPlan) extends UnaryExecNode {
-
- override def requiredChildOrdering: Seq[Seq[SortOrder]] = {
- if (performCardinalityCheck) {
- // request a local sort by the row ID attrs to co-locate matches for the same target row
- Seq(rowIdAttrs.map(attr => SortOrder(attr, Ascending)))
- } else {
- Seq(Nil)
- }
- }
-
- @transient override lazy val producedAttributes: AttributeSet = {
- AttributeSet(output.filterNot(attr => inputSet.contains(attr)))
- }
-
- @transient override lazy val references: AttributeSet = child.outputSet
-
- override def simpleString(maxFields: Int): String = {
- s"MergeRowsExec${truncatedString(output, "[", ", ", "]", maxFields)}"
- }
-
- override protected def withNewChildInternal(newChild: SparkPlan): SparkPlan = {
- copy(child = newChild)
- }
-
- protected override def doExecute(): RDD[InternalRow] = {
- child.execute().mapPartitions(processPartition)
- }
-
- private def createProjection(exprs: Seq[Expression], attrs: Seq[Attribute]): UnsafeProjection = {
- UnsafeProjection.create(exprs, attrs)
- }
-
- private def createPredicate(expr: Expression, attrs: Seq[Attribute]): BasePredicate = {
- GeneratePredicate.generate(expr, attrs)
- }
-
- private def applyProjection(
- actions: Seq[(BasePredicate, Option[UnsafeProjection])],
- inputRow: InternalRow): InternalRow = {
-
- // find the first action where the predicate evaluates to true
- // if there are overlapping conditions in actions, use the first matching action
- // in the example below, when id = 5, both actions match but the first one is applied
- // WHEN MATCHED AND id > 1 AND id < 10 UPDATE *
- // WHEN MATCHED AND id = 5 OR id = 21 DELETE
-
- val pair = actions.find {
- case (predicate, _) => predicate.eval(inputRow)
- }
-
- // apply the projection to produce an output row, or return null to suppress this row
- pair match {
- case Some((_, Some(projection))) =>
- projection.apply(inputRow)
- case _ =>
- null
- }
- }
-
- private def processPartition(rowIterator: Iterator[InternalRow]): Iterator[InternalRow] = {
- val inputAttrs = child.output
-
- val isSourceRowPresentPred = createPredicate(isSourceRowPresent, inputAttrs)
- val isTargetRowPresentPred = createPredicate(isTargetRowPresent, inputAttrs)
-
- val matchedPreds = matchedConditions.map(createPredicate(_, inputAttrs))
- val matchedProjs = matchedOutputs.map {
- case output if output.nonEmpty => Some(createProjection(output, inputAttrs))
- case _ => None
- }
- val matchedPairs = matchedPreds zip matchedProjs
-
- val notMatchedPreds = notMatchedConditions.map(createPredicate(_, inputAttrs))
- val notMatchedProjs = notMatchedOutputs.map {
- case output if output.nonEmpty => Some(createProjection(output, inputAttrs))
- case _ => None
- }
- val nonMatchedPairs = notMatchedPreds zip notMatchedProjs
-
- val projectTargetCols = createProjection(targetOutput, inputAttrs)
- val rowIdProj = createProjection(rowIdAttrs, inputAttrs)
-
- // This method is responsible for processing a input row to emit the resultant row with an
- // additional column that indicates whether the row is going to be included in the final
- // output of merge or not.
- // 1. Found a target row for which there is no corresponding source row (join condition not met)
- // - Only project the target columns if we need to output unchanged rows
- // 2. Found a source row for which there is no corresponding target row (join condition not met)
- // - Apply the not matched actions (i.e INSERT actions) if non match conditions are met.
- // 3. Found a source row for which there is a corresponding target row (join condition met)
- // - Apply the matched actions (i.e DELETE or UPDATE actions) if match conditions are met.
- def processRow(inputRow: InternalRow): InternalRow = {
- if (emitNotMatchedTargetRows && !isSourceRowPresentPred.eval(inputRow)) {
- projectTargetCols.apply(inputRow)
- } else if (!isTargetRowPresentPred.eval(inputRow)) {
- applyProjection(nonMatchedPairs, inputRow)
- } else {
- applyProjection(matchedPairs, inputRow)
- }
- }
-
- var lastMatchedRowId: InternalRow = null
-
- def processRowWithCardinalityCheck(inputRow: InternalRow): InternalRow = {
- val isSourceRowPresent = isSourceRowPresentPred.eval(inputRow)
- val isTargetRowPresent = isTargetRowPresentPred.eval(inputRow)
-
- if (isSourceRowPresent && isTargetRowPresent) {
- val currentRowId = rowIdProj.apply(inputRow)
- if (currentRowId == lastMatchedRowId) {
- throw new SparkException(
- "The ON search condition of the MERGE statement matched a single row from " +
- "the target table with multiple rows of the source table. This could result " +
- "in the target row being operated on more than once with an update or delete " +
- "operation and is not allowed.")
- }
- lastMatchedRowId = currentRowId.copy()
- } else {
- lastMatchedRowId = null
- }
-
- if (emitNotMatchedTargetRows && !isSourceRowPresent) {
- projectTargetCols.apply(inputRow)
- } else if (!isTargetRowPresent) {
- applyProjection(nonMatchedPairs, inputRow)
- } else {
- applyProjection(matchedPairs, inputRow)
- }
- }
-
- val processFunc: InternalRow => InternalRow = if (performCardinalityCheck) {
- processRowWithCardinalityCheck
- } else {
- processRow
- }
-
- rowIterator
- .map(processFunc)
- .filter(row => row != null)
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/OptimizeMetadataOnlyDeleteFromTable.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/OptimizeMetadataOnlyDeleteFromTable.scala
deleted file mode 100644
index 54e66db91b33..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/OptimizeMetadataOnlyDeleteFromTable.scala
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.execution.datasources.v2
-
-import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.catalyst.expressions.Literal
-import org.apache.spark.sql.catalyst.expressions.PredicateHelper
-import org.apache.spark.sql.catalyst.expressions.SubqueryExpression
-import org.apache.spark.sql.catalyst.plans.logical.DeleteFromIcebergTable
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.connector.catalog.SupportsDelete
-import org.apache.spark.sql.execution.datasources.DataSourceStrategy
-import org.apache.spark.sql.sources
-import org.slf4j.LoggerFactory
-
-/**
- * Checks whether a metadata delete is possible and nullifies the rewrite plan if the source can
- * handle this delete without executing the rewrite plan.
- *
- * Note this rule must be run after expression optimization.
- */
-object OptimizeMetadataOnlyDeleteFromTable extends Rule[LogicalPlan] with PredicateHelper {
-
- val logger = LoggerFactory.getLogger(OptimizeMetadataOnlyDeleteFromTable.getClass)
-
- override def apply(plan: LogicalPlan): LogicalPlan = plan transform {
- case d @ DeleteFromIcebergTable(relation: DataSourceV2Relation, cond, Some(_)) =>
- val deleteCond = cond.getOrElse(Literal.TrueLiteral)
- relation.table match {
- case table: SupportsDelete if !SubqueryExpression.hasSubquery(deleteCond) =>
- val predicates = splitConjunctivePredicates(deleteCond)
- val normalizedPredicates = DataSourceStrategy.normalizeExprs(predicates, relation.output)
- val dataSourceFilters = toDataSourceFilters(normalizedPredicates)
- val allPredicatesTranslated = normalizedPredicates.size == dataSourceFilters.length
- if (allPredicatesTranslated && table.canDeleteWhere(dataSourceFilters)) {
- logger.info(s"Optimizing delete expression: ${dataSourceFilters.mkString(",")} as metadata delete")
- d.copy(rewritePlan = None)
- } else {
- d
- }
- case _ =>
- d
- }
- }
-
- protected def toDataSourceFilters(predicates: Seq[Expression]): Array[sources.Filter] = {
- predicates.flatMap { p =>
- val filter = DataSourceStrategy.translateFilter(p, supportNestedPredicatePushdown = true)
- if (filter.isEmpty) {
- logWarning(s"Cannot translate expression to source filter: $p")
- }
- filter
- }.toArray
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ReplaceDataExec.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ReplaceDataExec.scala
deleted file mode 100644
index 26c652469ac4..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ReplaceDataExec.scala
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.execution.datasources.v2
-
-import org.apache.spark.sql.catalyst.expressions.AttributeSet
-import org.apache.spark.sql.connector.write.Write
-import org.apache.spark.sql.execution.SparkPlan
-
-/**
- * Physical plan node to replace data in existing tables.
- */
-case class ReplaceDataExec(
- query: SparkPlan,
- refreshCache: () => Unit,
- write: Write) extends V2ExistingTableWriteExec {
-
- override lazy val references: AttributeSet = query.outputSet
- override lazy val stringArgs: Iterator[Any] = Iterator(query, write)
-
- override protected def withNewChildInternal(newChild: SparkPlan): ReplaceDataExec = {
- copy(query = newChild)
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ReplacePartitionFieldExec.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ReplacePartitionFieldExec.scala
deleted file mode 100644
index fcae0a5defc4..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ReplacePartitionFieldExec.scala
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.execution.datasources.v2
-
-import org.apache.iceberg.spark.Spark3Util
-import org.apache.iceberg.spark.source.SparkTable
-import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.connector.catalog.Identifier
-import org.apache.spark.sql.connector.catalog.TableCatalog
-import org.apache.spark.sql.connector.expressions.FieldReference
-import org.apache.spark.sql.connector.expressions.IdentityTransform
-import org.apache.spark.sql.connector.expressions.Transform
-
-case class ReplacePartitionFieldExec(
- catalog: TableCatalog,
- ident: Identifier,
- transformFrom: Transform,
- transformTo: Transform,
- name: Option[String]) extends LeafV2CommandExec {
- import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
-
- override lazy val output: Seq[Attribute] = Nil
-
- override protected def run(): Seq[InternalRow] = {
- catalog.loadTable(ident) match {
- case iceberg: SparkTable =>
- val schema = iceberg.table.schema
- transformFrom match {
- case IdentityTransform(FieldReference(parts)) if parts.size == 1 && schema.findField(parts.head) == null =>
- // the name is not present in the Iceberg schema, so it must be a partition field name, not a column name
- iceberg.table.updateSpec()
- .removeField(parts.head)
- .addField(name.orNull, Spark3Util.toIcebergTerm(transformTo))
- .commit()
-
- case _ =>
- iceberg.table.updateSpec()
- .removeField(Spark3Util.toIcebergTerm(transformFrom))
- .addField(name.orNull, Spark3Util.toIcebergTerm(transformTo))
- .commit()
- }
-
- case table =>
- throw new UnsupportedOperationException(s"Cannot replace partition field in non-Iceberg table: $table")
- }
-
- Nil
- }
-
- override def simpleString(maxFields: Int): String = {
- s"ReplacePartitionField ${catalog.name}.${ident.quoted} ${transformFrom.describe} " +
- s"with ${name.map(n => s"$n=").getOrElse("")}${transformTo.describe}"
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ReplaceRewrittenRowLevelCommand.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ReplaceRewrittenRowLevelCommand.scala
deleted file mode 100644
index 414d4c0ec305..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ReplaceRewrittenRowLevelCommand.scala
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.execution.datasources.v2
-
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.plans.logical.RowLevelCommand
-import org.apache.spark.sql.catalyst.rules.Rule
-
-/**
- * Replaces operations such as DELETE and MERGE with the corresponding rewrite plans.
- */
-object ReplaceRewrittenRowLevelCommand extends Rule[LogicalPlan] {
- override def apply(plan: LogicalPlan): LogicalPlan = plan transformDown {
- case c: RowLevelCommand if c.rewritePlan.isDefined =>
- c.rewritePlan.get
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/RowLevelCommandScanRelationPushDown.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/RowLevelCommandScanRelationPushDown.scala
deleted file mode 100644
index 4e89b9a1c243..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/RowLevelCommandScanRelationPushDown.scala
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.execution.datasources.v2
-
-import org.apache.spark.sql.catalyst.expressions.AttributeReference
-import org.apache.spark.sql.catalyst.expressions.AttributeSet
-import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.catalyst.expressions.PredicateHelper
-import org.apache.spark.sql.catalyst.expressions.SubqueryExpression
-import org.apache.spark.sql.catalyst.planning.RewrittenRowLevelCommand
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.catalyst.util.CharVarcharUtils
-import org.apache.spark.sql.connector.read.ScanBuilder
-import org.apache.spark.sql.execution.datasources.DataSourceStrategy
-import org.apache.spark.sql.sources.Filter
-import org.apache.spark.sql.types.StructType
-
-object RowLevelCommandScanRelationPushDown extends Rule[LogicalPlan] with PredicateHelper {
- import ExtendedDataSourceV2Implicits._
-
- override def apply(plan: LogicalPlan): LogicalPlan = plan transformDown {
- // push down the filter from the command condition instead of the filter in the rewrite plan,
- // which may be negated for copy-on-write operations
- case RewrittenRowLevelCommand(command, relation: DataSourceV2Relation, rewritePlan) =>
- val table = relation.table.asRowLevelOperationTable
- val scanBuilder = table.newScanBuilder(relation.options)
-
- val (pushedFilters, remainingFilters) = command.condition match {
- case Some(cond) => pushFilters(cond, scanBuilder, relation.output)
- case None => (Nil, Nil)
- }
-
- val (scan, output) = PushDownUtils.pruneColumns(scanBuilder, relation, relation.output, Nil)
-
- logInfo(
- s"""
- |Pushing operators to ${relation.name}
- |Pushed filters: ${pushedFilters.mkString(", ")}
- |Filters that were not pushed: ${remainingFilters.mkString(",")}
- |Output: ${output.mkString(", ")}
- """.stripMargin)
-
- // replace DataSourceV2Relation with DataSourceV2ScanRelation for the row operation table
- // there may be multiple read relations for UPDATEs that rely on the UNION approach
- val newRewritePlan = rewritePlan transform {
- case r: DataSourceV2Relation if r.table eq table =>
- DataSourceV2ScanRelation(r, scan, toOutputAttrs(scan.readSchema(), r))
- }
-
- command.withNewRewritePlan(newRewritePlan)
- }
-
- private def pushFilters(
- cond: Expression,
- scanBuilder: ScanBuilder,
- tableAttrs: Seq[AttributeReference]): (Seq[Filter], Seq[Expression]) = {
-
- val tableAttrSet = AttributeSet(tableAttrs)
- val filters = splitConjunctivePredicates(cond).filter(_.references.subsetOf(tableAttrSet))
- val normalizedFilters = DataSourceStrategy.normalizeExprs(filters, tableAttrs)
- val (_, normalizedFiltersWithoutSubquery) =
- normalizedFilters.partition(SubqueryExpression.hasSubquery)
-
- PushDownUtils.pushFilters(scanBuilder, normalizedFiltersWithoutSubquery)
- }
-
- private def toOutputAttrs(
- schema: StructType,
- relation: DataSourceV2Relation): Seq[AttributeReference] = {
- val nameToAttr = relation.output.map(_.name).zip(relation.output).toMap
- val cleaned = CharVarcharUtils.replaceCharVarcharWithStringInSchema(schema)
- cleaned.toAttributes.map {
- // keep the attribute id during transformation
- a => a.withExprId(nameToAttr(a.name).exprId)
- }
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/SetIdentifierFieldsExec.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/SetIdentifierFieldsExec.scala
deleted file mode 100644
index b50550ad38ef..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/SetIdentifierFieldsExec.scala
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.execution.datasources.v2
-
-import org.apache.iceberg.spark.source.SparkTable
-import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.connector.catalog.Identifier
-import org.apache.spark.sql.connector.catalog.TableCatalog
-import scala.jdk.CollectionConverters._
-
-case class SetIdentifierFieldsExec(
- catalog: TableCatalog,
- ident: Identifier,
- fields: Seq[String]) extends LeafV2CommandExec {
- import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
-
- override lazy val output: Seq[Attribute] = Nil
-
- override protected def run(): Seq[InternalRow] = {
- catalog.loadTable(ident) match {
- case iceberg: SparkTable =>
- iceberg.table.updateSchema()
- .setIdentifierFields(fields.asJava)
- .commit();
- case table =>
- throw new UnsupportedOperationException(s"Cannot set identifier fields in non-Iceberg table: $table")
- }
-
- Nil
- }
-
- override def simpleString(maxFields: Int): String = {
- s"SetIdentifierFields ${catalog.name}.${ident.quoted} (${fields.quoted})";
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/SetWriteDistributionAndOrderingExec.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/SetWriteDistributionAndOrderingExec.scala
deleted file mode 100644
index 386485b10b05..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/SetWriteDistributionAndOrderingExec.scala
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.execution.datasources.v2
-
-import org.apache.iceberg.DistributionMode
-import org.apache.iceberg.NullOrder
-import org.apache.iceberg.SortDirection
-import org.apache.iceberg.TableProperties.WRITE_DISTRIBUTION_MODE
-import org.apache.iceberg.expressions.Term
-import org.apache.iceberg.spark.source.SparkTable
-import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.connector.catalog.CatalogV2Implicits
-import org.apache.spark.sql.connector.catalog.Identifier
-import org.apache.spark.sql.connector.catalog.TableCatalog
-
-case class SetWriteDistributionAndOrderingExec(
- catalog: TableCatalog,
- ident: Identifier,
- distributionMode: DistributionMode,
- sortOrder: Seq[(Term, SortDirection, NullOrder)]) extends LeafV2CommandExec {
-
- import CatalogV2Implicits._
-
- override lazy val output: Seq[Attribute] = Nil
-
- override protected def run(): Seq[InternalRow] = {
- catalog.loadTable(ident) match {
- case iceberg: SparkTable =>
- val txn = iceberg.table.newTransaction()
-
- val orderBuilder = txn.replaceSortOrder()
- sortOrder.foreach {
- case (term, SortDirection.ASC, nullOrder) =>
- orderBuilder.asc(term, nullOrder)
- case (term, SortDirection.DESC, nullOrder) =>
- orderBuilder.desc(term, nullOrder)
- }
- orderBuilder.commit()
-
- txn.updateProperties()
- .set(WRITE_DISTRIBUTION_MODE, distributionMode.modeName())
- .commit()
-
- txn.commitTransaction()
-
- case table =>
- throw new UnsupportedOperationException(s"Cannot set write order of non-Iceberg table: $table")
- }
-
- Nil
- }
-
- override def simpleString(maxFields: Int): String = {
- val tableIdent = s"${catalog.name}.${ident.quoted}"
- val order = sortOrder.map {
- case (term, direction, nullOrder) => s"$term $direction $nullOrder"
- }.mkString(", ")
- s"SetWriteDistributionAndOrdering $tableIdent $distributionMode $order"
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteDeltaExec.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteDeltaExec.scala
deleted file mode 100644
index fa4e4f648313..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteDeltaExec.scala
+++ /dev/null
@@ -1,238 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.execution.datasources.v2
-
-import org.apache.spark.SparkEnv
-import org.apache.spark.SparkException
-import org.apache.spark.TaskContext
-import org.apache.spark.internal.Logging
-import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.AttributeSet
-import org.apache.spark.sql.catalyst.util.RowDeltaUtils._
-import org.apache.spark.sql.catalyst.util.WriteDeltaProjections
-import org.apache.spark.sql.connector.iceberg.write.DeltaWrite
-import org.apache.spark.sql.connector.iceberg.write.DeltaWriter
-import org.apache.spark.sql.connector.write.BatchWrite
-import org.apache.spark.sql.connector.write.DataWriter
-import org.apache.spark.sql.connector.write.DataWriterFactory
-import org.apache.spark.sql.connector.write.PhysicalWriteInfoImpl
-import org.apache.spark.sql.connector.write.WriterCommitMessage
-import org.apache.spark.sql.errors.QueryExecutionErrors
-import org.apache.spark.sql.execution.SparkPlan
-import org.apache.spark.sql.execution.metric.CustomMetrics
-import org.apache.spark.sql.execution.metric.SQLMetric
-import org.apache.spark.util.LongAccumulator
-import org.apache.spark.util.Utils
-import scala.collection.compat.immutable.ArraySeq
-import scala.util.control.NonFatal
-
-/**
- * Physical plan node to write a delta of rows to an existing table.
- */
-case class WriteDeltaExec(
- query: SparkPlan,
- refreshCache: () => Unit,
- projections: WriteDeltaProjections,
- write: DeltaWrite) extends ExtendedV2ExistingTableWriteExec[DeltaWriter[InternalRow]] {
-
- override lazy val references: AttributeSet = query.outputSet
- override lazy val stringArgs: Iterator[Any] = Iterator(query, write)
-
- override lazy val writingTask: WritingSparkTask[DeltaWriter[InternalRow]] = {
- DeltaWithMetadataWritingSparkTask(projections)
- }
-
- override protected def withNewChildInternal(newChild: SparkPlan): WriteDeltaExec = {
- copy(query = newChild)
- }
-}
-
-// a trait similar to V2ExistingTableWriteExec but supports custom write tasks
-trait ExtendedV2ExistingTableWriteExec[W <: DataWriter[InternalRow]] extends V2ExistingTableWriteExec {
- def writingTask: WritingSparkTask[W]
-
- protected override def writeWithV2(batchWrite: BatchWrite): Seq[InternalRow] = {
- val rdd: RDD[InternalRow] = {
- val tempRdd = query.execute()
- // SPARK-23271 If we are attempting to write a zero partition rdd, create a dummy single
- // partition rdd to make sure we at least set up one write task to write the metadata.
- if (tempRdd.partitions.length == 0) {
- sparkContext.parallelize(Seq.empty[InternalRow], 1)
- } else {
- tempRdd
- }
- }
- // introduce a local var to avoid serializing the whole class
- val task = writingTask
- val writerFactory = batchWrite.createBatchWriterFactory(
- PhysicalWriteInfoImpl(rdd.getNumPartitions))
- val useCommitCoordinator = batchWrite.useCommitCoordinator
- val messages = new Array[WriterCommitMessage](rdd.partitions.length)
- val totalNumRowsAccumulator = new LongAccumulator()
-
- logInfo(s"Start processing data source write support: $batchWrite. " +
- s"The input RDD has ${messages.length} partitions.")
-
- // Avoid object not serializable issue.
- val writeMetrics: Map[String, SQLMetric] = customMetrics
-
- try {
- sparkContext.runJob(
- rdd,
- (context: TaskContext, iter: Iterator[InternalRow]) =>
- task.run(writerFactory, context, iter, useCommitCoordinator, writeMetrics),
- rdd.partitions.indices,
- (index, result: DataWritingSparkTaskResult) => {
- val commitMessage = result.writerCommitMessage
- messages(index) = commitMessage
- totalNumRowsAccumulator.add(result.numRows)
- batchWrite.onDataWriterCommit(commitMessage)
- }
- )
-
- logInfo(s"Data source write support $batchWrite is committing.")
- batchWrite.commit(messages)
- logInfo(s"Data source write support $batchWrite committed.")
- commitProgress = Some(StreamWriterCommitProgress(totalNumRowsAccumulator.value))
- } catch {
- case cause: Throwable =>
- logError(s"Data source write support $batchWrite is aborting.")
- try {
- batchWrite.abort(messages)
- } catch {
- case t: Throwable =>
- logError(s"Data source write support $batchWrite failed to abort.")
- cause.addSuppressed(t)
- throw QueryExecutionErrors.writingJobFailedError(cause)
- }
- logError(s"Data source write support $batchWrite aborted.")
- cause match {
- // Only wrap non fatal exceptions.
- case NonFatal(e) => throw QueryExecutionErrors.writingJobAbortedError(e)
- case _ => throw cause
- }
- }
-
- Nil
- }
-}
-
-trait WritingSparkTask[W <: DataWriter[InternalRow]] extends Logging with Serializable {
-
- protected def writeFunc(writer: W, row: InternalRow): Unit
-
- def run(
- writerFactory: DataWriterFactory,
- context: TaskContext,
- iter: Iterator[InternalRow],
- useCommitCoordinator: Boolean,
- customMetrics: Map[String, SQLMetric]): DataWritingSparkTaskResult = {
- val stageId = context.stageId()
- val stageAttempt = context.stageAttemptNumber()
- val partId = context.partitionId()
- val taskId = context.taskAttemptId()
- val attemptId = context.attemptNumber()
- val dataWriter = writerFactory.createWriter(partId, taskId).asInstanceOf[W]
-
- var count = 0L
- // write the data and commit this writer.
- Utils.tryWithSafeFinallyAndFailureCallbacks(block = {
- while (iter.hasNext) {
- if (count % CustomMetrics.NUM_ROWS_PER_UPDATE == 0) {
- CustomMetrics.updateMetrics(ArraySeq.unsafeWrapArray(dataWriter.currentMetricsValues), customMetrics)
- }
-
- // Count is here.
- count += 1
- writeFunc(dataWriter, iter.next())
- }
-
- CustomMetrics.updateMetrics(ArraySeq.unsafeWrapArray(dataWriter.currentMetricsValues), customMetrics)
-
- val msg = if (useCommitCoordinator) {
- val coordinator = SparkEnv.get.outputCommitCoordinator
- val commitAuthorized = coordinator.canCommit(stageId, stageAttempt, partId, attemptId)
- if (commitAuthorized) {
- logInfo(s"Commit authorized for partition $partId (task $taskId, attempt $attemptId, " +
- s"stage $stageId.$stageAttempt)")
- dataWriter.commit()
- } else {
- val commitDeniedException = QueryExecutionErrors.commitDeniedError(
- partId, taskId, attemptId, stageId, stageAttempt)
- logInfo(commitDeniedException.getMessage)
- // throwing CommitDeniedException will trigger the catch block for abort
- throw commitDeniedException
- }
-
- } else {
- logInfo(s"Writer for partition ${context.partitionId()} is committing.")
- dataWriter.commit()
- }
-
- logInfo(s"Committed partition $partId (task $taskId, attempt $attemptId, " +
- s"stage $stageId.$stageAttempt)")
-
- DataWritingSparkTaskResult(count, msg)
-
- })(catchBlock = {
- // If there is an error, abort this writer
- logError(s"Aborting commit for partition $partId (task $taskId, attempt $attemptId, " +
- s"stage $stageId.$stageAttempt)")
- dataWriter.abort()
- logError(s"Aborted commit for partition $partId (task $taskId, attempt $attemptId, " +
- s"stage $stageId.$stageAttempt)")
- }, finallyBlock = {
- dataWriter.close()
- })
- }
-}
-
-case class DeltaWithMetadataWritingSparkTask(
- projs: WriteDeltaProjections) extends WritingSparkTask[DeltaWriter[InternalRow]] {
-
- private lazy val rowProjection = projs.rowProjection.orNull
- private lazy val rowIdProjection = projs.rowIdProjection
- private lazy val metadataProjection = projs.metadataProjection.orNull
-
- override protected def writeFunc(writer: DeltaWriter[InternalRow], row: InternalRow): Unit = {
- val operation = row.getInt(0)
-
- operation match {
- case DELETE_OPERATION =>
- rowIdProjection.project(row)
- metadataProjection.project(row)
- writer.delete(metadataProjection, rowIdProjection)
-
- case UPDATE_OPERATION =>
- rowProjection.project(row)
- rowIdProjection.project(row)
- metadataProjection.project(row)
- writer.update(metadataProjection, rowIdProjection, rowProjection)
-
- case INSERT_OPERATION =>
- rowProjection.project(row)
- writer.insert(rowProjection)
-
- case other =>
- throw new SparkException(s"Unexpected operation ID: $other")
- }
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/RowLevelCommandDynamicPruning.scala b/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/RowLevelCommandDynamicPruning.scala
deleted file mode 100644
index 4b257920b66f..000000000000
--- a/spark/v3.2/spark-extensions/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/RowLevelCommandDynamicPruning.scala
+++ /dev/null
@@ -1,183 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.spark.sql.execution.dynamicpruning
-
-import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.catalyst.expressions.And
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.catalyst.expressions.AttributeMap
-import org.apache.spark.sql.catalyst.expressions.AttributeReference
-import org.apache.spark.sql.catalyst.expressions.DynamicPruningSubquery
-import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.catalyst.expressions.ExtendedV2ExpressionUtils
-import org.apache.spark.sql.catalyst.expressions.Literal
-import org.apache.spark.sql.catalyst.expressions.PredicateHelper
-import org.apache.spark.sql.catalyst.expressions.SubqueryExpression
-import org.apache.spark.sql.catalyst.planning.RewrittenRowLevelCommand
-import org.apache.spark.sql.catalyst.plans.LeftSemi
-import org.apache.spark.sql.catalyst.plans.logical.DeleteFromIcebergTable
-import org.apache.spark.sql.catalyst.plans.logical.Filter
-import org.apache.spark.sql.catalyst.plans.logical.Join
-import org.apache.spark.sql.catalyst.plans.logical.JoinHint
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.plans.logical.MergeIntoIcebergTable
-import org.apache.spark.sql.catalyst.plans.logical.Project
-import org.apache.spark.sql.catalyst.plans.logical.ReplaceData
-import org.apache.spark.sql.catalyst.plans.logical.RowLevelCommand
-import org.apache.spark.sql.catalyst.plans.logical.Sort
-import org.apache.spark.sql.catalyst.plans.logical.Subquery
-import org.apache.spark.sql.catalyst.plans.logical.UpdateIcebergTable
-import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.catalyst.trees.TreePattern.PLAN_EXPRESSION
-import org.apache.spark.sql.catalyst.trees.TreePattern.SORT
-import org.apache.spark.sql.connector.read.SupportsRuntimeFiltering
-import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
-import org.apache.spark.sql.execution.datasources.v2.DataSourceV2ScanRelation
-import org.apache.spark.sql.execution.datasources.v2.ExtendedDataSourceV2Implicits
-import scala.collection.compat.immutable.ArraySeq
-
-/**
- * A rule that adds a runtime filter for row-level commands.
- *
- * Note that only group-based rewrite plans (i.e. ReplaceData) are taken into account.
- * Row-based rewrite plans are subject to usual runtime filtering.
- */
-case class RowLevelCommandDynamicPruning(spark: SparkSession) extends Rule[LogicalPlan] with PredicateHelper {
-
- import ExtendedDataSourceV2Implicits._
-
- override def apply(plan: LogicalPlan): LogicalPlan = plan transformDown {
- // apply special dynamic filtering only for plans that don't support deltas
- case RewrittenRowLevelCommand(
- command: RowLevelCommand,
- DataSourceV2ScanRelation(_, scan: SupportsRuntimeFiltering, _),
- rewritePlan: ReplaceData) if conf.dynamicPartitionPruningEnabled && isCandidate(command) =>
-
- // use reference equality to find exactly the required scan relations
- val newRewritePlan = rewritePlan transformUp {
- case r: DataSourceV2ScanRelation if r.scan eq scan =>
- // use the original table instance that was loaded for this row-level operation
- // in order to leverage a regular batch scan in the group filter query
- val originalTable = r.relation.table.asRowLevelOperationTable.table
- val relation = r.relation.copy(table = originalTable)
- val matchingRowsPlan = buildMatchingRowsPlan(relation, command)
-
- val filterAttrs = ArraySeq.unsafeWrapArray(scan.filterAttributes)
- val buildKeys = ExtendedV2ExpressionUtils.resolveRefs[Attribute](filterAttrs, matchingRowsPlan)
- val pruningKeys = ExtendedV2ExpressionUtils.resolveRefs[Attribute](filterAttrs, r)
- val dynamicPruningCond = buildDynamicPruningCond(matchingRowsPlan, buildKeys, pruningKeys)
-
- Filter(dynamicPruningCond, r)
- }
-
- // always optimize dynamic filtering subqueries for row-level commands as it is important
- // to rewrite introduced predicates as joins because Spark recently stopped optimizing
- // dynamic subqueries to facilitate broadcast reuse
- command.withNewRewritePlan(optimizeSubquery(newRewritePlan))
-
- }
-
- private def isCandidate(command: RowLevelCommand): Boolean = command.condition match {
- case Some(cond) if cond != Literal.TrueLiteral => true
- case _ => false
- }
-
- private def buildMatchingRowsPlan(
- relation: DataSourceV2Relation,
- command: RowLevelCommand): LogicalPlan = {
-
- // construct a filtering plan with the original scan relation
- val matchingRowsPlan = command match {
- case d: DeleteFromIcebergTable =>
- Filter(d.condition.get, relation)
-
- case u: UpdateIcebergTable =>
- // UPDATEs with subqueries are rewritten using a UNION with two identical scan relations
- // the analyzer clones of them and assigns fresh expr IDs so that attributes don't collide
- // this rule assigns dynamic filters to both scan relations based on the update condition
- // the condition always refers to the original expr IDs and must be transformed
- // see RewriteUpdateTable for more details
- val attrMap = buildAttrMap(u.table.output, relation.output)
- val transformedCond = u.condition.get transform {
- case attr: AttributeReference if attrMap.contains(attr) => attrMap(attr)
- }
- Filter(transformedCond, relation)
-
- case m: MergeIntoIcebergTable =>
- Join(relation, m.sourceTable, LeftSemi, Some(m.mergeCondition), JoinHint.NONE)
- }
-
- // clone the original relation in the filtering plan and assign new expr IDs to avoid conflicts
- matchingRowsPlan transformUpWithNewOutput {
- case r: DataSourceV2Relation if r eq relation =>
- val oldOutput = r.output
- val newOutput = oldOutput.map(_.newInstance())
- r.copy(output = newOutput) -> oldOutput.zip(newOutput)
- }
- }
-
- private def buildDynamicPruningCond(
- matchingRowsPlan: LogicalPlan,
- buildKeys: Seq[Attribute],
- pruningKeys: Seq[Attribute]): Expression = {
-
- val buildQuery = Project(buildKeys, matchingRowsPlan)
- val dynamicPruningSubqueries = pruningKeys.zipWithIndex.map { case (key, index) =>
- DynamicPruningSubquery(key, buildQuery, buildKeys, index, onlyInBroadcast = false)
- }
- dynamicPruningSubqueries.reduce(And)
- }
-
- private def buildAttrMap(
- tableAttrs: Seq[Attribute],
- scanAttrs: Seq[Attribute]): AttributeMap[Attribute] = {
-
- val resolver = conf.resolver
- val attrMapping = tableAttrs.flatMap { tableAttr =>
- scanAttrs
- .find(scanAttr => resolver(scanAttr.name, tableAttr.name))
- .map(scanAttr => tableAttr -> scanAttr)
- }
- AttributeMap(attrMapping)
- }
-
- // borrowed from OptimizeSubqueries in Spark
- private def optimizeSubquery(plan: LogicalPlan): LogicalPlan = plan.transformAllExpressionsWithPruning(
- _.containsPattern(PLAN_EXPRESSION)) {
- case s: SubqueryExpression =>
- val Subquery(newPlan, _) = spark.sessionState.optimizer.execute(Subquery.fromExpression(s))
- // At this point we have an optimized subquery plan that we are going to attach
- // to this subquery expression. Here we can safely remove any top level sort
- // in the plan as tuples produced by a subquery are un-ordered.
- s.withNewPlan(removeTopLevelSort(newPlan))
- }
-
- // borrowed from OptimizeSubqueries in Spark
- private def removeTopLevelSort(plan: LogicalPlan): LogicalPlan = {
- if (!plan.containsPattern(SORT)) {
- return plan
- }
- plan match {
- case Sort(_, _, child) => child
- case Project(fields, child) => Project(fields, removeTopLevelSort(child))
- case other => other
- }
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/Employee.java b/spark/v3.2/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/Employee.java
deleted file mode 100644
index 8918dfec6584..000000000000
--- a/spark/v3.2/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/Employee.java
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.iceberg.spark.extensions;
-
-import java.util.Objects;
-
-public class Employee {
- private Integer id;
- private String dep;
-
- public Employee() {}
-
- public Employee(Integer id, String dep) {
- this.id = id;
- this.dep = dep;
- }
-
- public Integer getId() {
- return id;
- }
-
- public void setId(Integer id) {
- this.id = id;
- }
-
- public String getDep() {
- return dep;
- }
-
- public void setDep(String dep) {
- this.dep = dep;
- }
-
- @Override
- public boolean equals(Object other) {
- if (this == other) {
- return true;
- } else if (other == null || getClass() != other.getClass()) {
- return false;
- }
-
- Employee employee = (Employee) other;
- return Objects.equals(id, employee.id) && Objects.equals(dep, employee.dep);
- }
-
- @Override
- public int hashCode() {
- return Objects.hash(id, dep);
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/SparkExtensionsTestBase.java b/spark/v3.2/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/SparkExtensionsTestBase.java
deleted file mode 100644
index fb9cc09567fa..000000000000
--- a/spark/v3.2/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/SparkExtensionsTestBase.java
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.iceberg.spark.extensions;
-
-import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.METASTOREURIS;
-
-import java.util.Map;
-import java.util.Random;
-import java.util.concurrent.ThreadLocalRandom;
-import org.apache.iceberg.CatalogUtil;
-import org.apache.iceberg.hive.HiveCatalog;
-import org.apache.iceberg.hive.TestHiveMetastore;
-import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
-import org.apache.iceberg.spark.SparkCatalogTestBase;
-import org.apache.iceberg.spark.SparkTestBase;
-import org.apache.spark.sql.SparkSession;
-import org.apache.spark.sql.internal.SQLConf;
-import org.junit.BeforeClass;
-
-public abstract class SparkExtensionsTestBase extends SparkCatalogTestBase {
-
- private static final Random RANDOM = ThreadLocalRandom.current();
-
- public SparkExtensionsTestBase(
- String catalogName, String implementation, Map config) {
- super(catalogName, implementation, config);
- }
-
- @BeforeClass
- public static void startMetastoreAndSpark() {
- SparkTestBase.metastore = new TestHiveMetastore();
- metastore.start();
- SparkTestBase.hiveConf = metastore.hiveConf();
-
- SparkTestBase.spark =
- SparkSession.builder()
- .master("local[2]")
- .config("spark.testing", "true")
- .config(SQLConf.PARTITION_OVERWRITE_MODE().key(), "dynamic")
- .config("spark.sql.extensions", IcebergSparkSessionExtensions.class.getName())
- .config("spark.hadoop." + METASTOREURIS.varname, hiveConf.get(METASTOREURIS.varname))
- .config("spark.sql.shuffle.partitions", "4")
- .config("spark.sql.hive.metastorePartitionPruningFallbackOnException", "true")
- .config(
- SQLConf.ADAPTIVE_EXECUTION_ENABLED().key(), String.valueOf(RANDOM.nextBoolean()))
- .enableHiveSupport()
- .getOrCreate();
-
- SparkTestBase.catalog =
- (HiveCatalog)
- CatalogUtil.loadCatalog(
- HiveCatalog.class.getName(), "hive", ImmutableMap.of(), hiveConf);
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/SparkRowLevelOperationsTestBase.java b/spark/v3.2/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/SparkRowLevelOperationsTestBase.java
deleted file mode 100644
index e53c79f6c971..000000000000
--- a/spark/v3.2/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/SparkRowLevelOperationsTestBase.java
+++ /dev/null
@@ -1,314 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.iceberg.spark.extensions;
-
-import static org.apache.iceberg.DataOperations.DELETE;
-import static org.apache.iceberg.DataOperations.OVERWRITE;
-import static org.apache.iceberg.SnapshotSummary.ADDED_DELETE_FILES_PROP;
-import static org.apache.iceberg.SnapshotSummary.ADDED_FILES_PROP;
-import static org.apache.iceberg.SnapshotSummary.CHANGED_PARTITION_COUNT_PROP;
-import static org.apache.iceberg.SnapshotSummary.DELETED_FILES_PROP;
-import static org.apache.iceberg.TableProperties.DEFAULT_FILE_FORMAT;
-import static org.apache.iceberg.TableProperties.PARQUET_VECTORIZATION_ENABLED;
-import static org.apache.iceberg.TableProperties.WRITE_DISTRIBUTION_MODE;
-import static org.apache.iceberg.TableProperties.WRITE_DISTRIBUTION_MODE_HASH;
-import static org.apache.iceberg.TableProperties.WRITE_DISTRIBUTION_MODE_NONE;
-import static org.apache.iceberg.TableProperties.WRITE_DISTRIBUTION_MODE_RANGE;
-
-import java.io.IOException;
-import java.io.UncheckedIOException;
-import java.util.Arrays;
-import java.util.List;
-import java.util.Map;
-import java.util.Random;
-import java.util.Set;
-import java.util.concurrent.ThreadLocalRandom;
-import java.util.stream.Collectors;
-import org.apache.iceberg.DataFile;
-import org.apache.iceberg.Files;
-import org.apache.iceberg.Snapshot;
-import org.apache.iceberg.Table;
-import org.apache.iceberg.data.GenericRecord;
-import org.apache.iceberg.data.parquet.GenericParquetWriter;
-import org.apache.iceberg.io.DataWriter;
-import org.apache.iceberg.io.OutputFile;
-import org.apache.iceberg.parquet.Parquet;
-import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
-import org.apache.iceberg.spark.SparkCatalog;
-import org.apache.iceberg.spark.SparkSessionCatalog;
-import org.apache.spark.sql.Dataset;
-import org.apache.spark.sql.Encoder;
-import org.apache.spark.sql.Encoders;
-import org.apache.spark.sql.Row;
-import org.apache.spark.sql.catalyst.analysis.NoSuchTableException;
-import org.junit.Assert;
-import org.junit.runner.RunWith;
-import org.junit.runners.Parameterized;
-import org.junit.runners.Parameterized.Parameters;
-
-@RunWith(Parameterized.class)
-public abstract class SparkRowLevelOperationsTestBase extends SparkExtensionsTestBase {
-
- private static final Random RANDOM = ThreadLocalRandom.current();
-
- protected final String fileFormat;
- protected final boolean vectorized;
- protected final String distributionMode;
-
- public SparkRowLevelOperationsTestBase(
- String catalogName,
- String implementation,
- Map config,
- String fileFormat,
- boolean vectorized,
- String distributionMode) {
- super(catalogName, implementation, config);
- this.fileFormat = fileFormat;
- this.vectorized = vectorized;
- this.distributionMode = distributionMode;
- }
-
- @Parameters(
- name =
- "catalogName = {0}, implementation = {1}, config = {2},"
- + " format = {3}, vectorized = {4}, distributionMode = {5}")
- public static Object[][] parameters() {
- return new Object[][] {
- {
- "testhive",
- SparkCatalog.class.getName(),
- ImmutableMap.of(
- "type", "hive",
- "default-namespace", "default"),
- "orc",
- true,
- WRITE_DISTRIBUTION_MODE_NONE
- },
- {
- "testhive",
- SparkCatalog.class.getName(),
- ImmutableMap.of(
- "type", "hive",
- "default-namespace", "default"),
- "parquet",
- true,
- WRITE_DISTRIBUTION_MODE_NONE
- },
- {
- "testhadoop",
- SparkCatalog.class.getName(),
- ImmutableMap.of("type", "hadoop"),
- "parquet",
- RANDOM.nextBoolean(),
- WRITE_DISTRIBUTION_MODE_HASH
- },
- {
- "spark_catalog",
- SparkSessionCatalog.class.getName(),
- ImmutableMap.of(
- "type", "hive",
- "default-namespace", "default",
- "clients", "1",
- "parquet-enabled", "false",
- "cache-enabled",
- "false" // Spark will delete tables using v1, leaving the cache out of sync
- ),
- "avro",
- false,
- WRITE_DISTRIBUTION_MODE_RANGE
- }
- };
- }
-
- protected abstract Map extraTableProperties();
-
- protected void initTable() {
- sql("ALTER TABLE %s SET TBLPROPERTIES('%s' '%s')", tableName, DEFAULT_FILE_FORMAT, fileFormat);
- sql(
- "ALTER TABLE %s SET TBLPROPERTIES('%s' '%s')",
- tableName, WRITE_DISTRIBUTION_MODE, distributionMode);
-
- switch (fileFormat) {
- case "parquet":
- sql(
- "ALTER TABLE %s SET TBLPROPERTIES('%s' '%b')",
- tableName, PARQUET_VECTORIZATION_ENABLED, vectorized);
- break;
- case "orc":
- Assert.assertTrue(vectorized);
- break;
- case "avro":
- Assert.assertFalse(vectorized);
- break;
- }
-
- Map props = extraTableProperties();
- props.forEach(
- (prop, value) -> {
- sql("ALTER TABLE %s SET TBLPROPERTIES('%s' '%s')", tableName, prop, value);
- });
- }
-
- protected void createAndInitTable(String schema) {
- createAndInitTable(schema, null);
- }
-
- protected void createAndInitTable(String schema, String jsonData) {
- sql("CREATE TABLE %s (%s) USING iceberg", tableName, schema);
- initTable();
-
- if (jsonData != null) {
- try {
- Dataset ds = toDS(schema, jsonData);
- ds.writeTo(tableName).append();
- } catch (NoSuchTableException e) {
- throw new RuntimeException("Failed to write data", e);
- }
- }
- }
-
- protected void append(String table, String jsonData) {
- append(table, null, jsonData);
- }
-
- protected void append(String table, String schema, String jsonData) {
- try {
- Dataset ds = toDS(schema, jsonData);
- ds.coalesce(1).writeTo(table).append();
- } catch (NoSuchTableException e) {
- throw new RuntimeException("Failed to write data", e);
- }
- }
-
- protected void createOrReplaceView(String name, String jsonData) {
- createOrReplaceView(name, null, jsonData);
- }
-
- protected void createOrReplaceView(String name, String schema, String jsonData) {
- Dataset ds = toDS(schema, jsonData);
- ds.createOrReplaceTempView(name);
- }
-
- protected void createOrReplaceView(String name, List data, Encoder encoder) {
- spark.createDataset(data, encoder).createOrReplaceTempView(name);
- }
-
- private Dataset toDS(String schema, String jsonData) {
- List jsonRows =
- Arrays.stream(jsonData.split("\n"))
- .filter(str -> !str.trim().isEmpty())
- .collect(Collectors.toList());
- Dataset jsonDS = spark.createDataset(jsonRows, Encoders.STRING());
-
- if (schema != null) {
- return spark.read().schema(schema).json(jsonDS);
- } else {
- return spark.read().json(jsonDS);
- }
- }
-
- protected void validateDelete(
- Snapshot snapshot, String changedPartitionCount, String deletedDataFiles) {
- validateSnapshot(snapshot, DELETE, changedPartitionCount, deletedDataFiles, null, null);
- }
-
- protected void validateCopyOnWrite(
- Snapshot snapshot,
- String changedPartitionCount,
- String deletedDataFiles,
- String addedDataFiles) {
- validateSnapshot(
- snapshot, OVERWRITE, changedPartitionCount, deletedDataFiles, null, addedDataFiles);
- }
-
- protected void validateMergeOnRead(
- Snapshot snapshot,
- String changedPartitionCount,
- String addedDeleteFiles,
- String addedDataFiles) {
- validateSnapshot(
- snapshot, OVERWRITE, changedPartitionCount, null, addedDeleteFiles, addedDataFiles);
- }
-
- protected void validateSnapshot(
- Snapshot snapshot,
- String operation,
- String changedPartitionCount,
- String deletedDataFiles,
- String addedDeleteFiles,
- String addedDataFiles) {
- Assert.assertEquals("Operation must match", operation, snapshot.operation());
- validateProperty(snapshot, CHANGED_PARTITION_COUNT_PROP, changedPartitionCount);
- validateProperty(snapshot, DELETED_FILES_PROP, deletedDataFiles);
- validateProperty(snapshot, ADDED_DELETE_FILES_PROP, addedDeleteFiles);
- validateProperty(snapshot, ADDED_FILES_PROP, addedDataFiles);
- }
-
- protected void validateProperty(Snapshot snapshot, String property, Set expectedValues) {
- String actual = snapshot.summary().get(property);
- Assert.assertTrue(
- "Snapshot property "
- + property
- + " has unexpected value, actual = "
- + actual
- + ", expected one of : "
- + String.join(",", expectedValues),
- expectedValues.contains(actual));
- }
-
- protected void validateProperty(Snapshot snapshot, String property, String expectedValue) {
- String actual = snapshot.summary().get(property);
- Assert.assertEquals(
- "Snapshot property " + property + " has unexpected value.", expectedValue, actual);
- }
-
- protected void sleep(long millis) {
- try {
- Thread.sleep(millis);
- } catch (InterruptedException e) {
- throw new RuntimeException(e);
- }
- }
-
- protected DataFile writeDataFile(Table table, List records) {
- try {
- OutputFile file = Files.localOutput(temp.newFile());
-
- DataWriter dataWriter =
- Parquet.writeData(file)
- .forTable(table)
- .createWriterFunc(GenericParquetWriter::buildWriter)
- .overwrite()
- .build();
-
- try {
- for (GenericRecord record : records) {
- dataWriter.write(record);
- }
- } finally {
- dataWriter.close();
- }
-
- return dataWriter.toDataFile();
-
- } catch (IOException e) {
- throw new UncheckedIOException(e);
- }
- }
-}
diff --git a/spark/v3.2/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestAddFilesProcedure.java b/spark/v3.2/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestAddFilesProcedure.java
deleted file mode 100644
index 10426ebb5594..000000000000
--- a/spark/v3.2/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestAddFilesProcedure.java
+++ /dev/null
@@ -1,1136 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.iceberg.spark.extensions;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.List;
-import java.util.Map;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-import java.util.stream.Collectors;
-import org.apache.avro.Schema;
-import org.apache.avro.SchemaBuilder;
-import org.apache.avro.file.DataFileWriter;
-import org.apache.avro.generic.GenericData;
-import org.apache.avro.generic.GenericDatumWriter;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.io.DatumWriter;
-import org.apache.iceberg.AssertHelpers;
-import org.apache.iceberg.DataFile;
-import org.apache.iceberg.TableProperties;
-import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
-import org.apache.iceberg.relocated.com.google.common.collect.Lists;
-import org.apache.spark.sql.Dataset;
-import org.apache.spark.sql.Row;
-import org.apache.spark.sql.RowFactory;
-import org.apache.spark.sql.types.DataTypes;
-import org.apache.spark.sql.types.Metadata;
-import org.apache.spark.sql.types.StructField;
-import org.apache.spark.sql.types.StructType;
-import org.joda.time.DateTime;
-import org.junit.After;
-import org.junit.Assert;
-import org.junit.Assume;
-import org.junit.Before;
-import org.junit.Ignore;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TemporaryFolder;
-
-public class TestAddFilesProcedure extends SparkExtensionsTestBase {
-
- private final String sourceTableName = "source_table";
- private File fileTableDir;
-
- public TestAddFilesProcedure(
- String catalogName, String implementation, Map config) {
- super(catalogName, implementation, config);
- }
-
- @Rule public TemporaryFolder temp = new TemporaryFolder();
-
- @Before
- public void setupTempDirs() {
- try {
- fileTableDir = temp.newFolder();
- } catch (IOException e) {
- throw new RuntimeException(e);
- }
- }
-
- @After
- public void dropTables() {
- sql("DROP TABLE IF EXISTS %s", sourceTableName);
- sql("DROP TABLE IF EXISTS %s", tableName);
- }
-
- @Test
- public void addDataUnpartitioned() {
- createUnpartitionedFileTable("parquet");
-
- String createIceberg =
- "CREATE TABLE %s (id Integer, name String, dept String, subdept String) USING iceberg";
-
- sql(createIceberg, tableName);
-
- Object result =
- scalarSql(
- "CALL %s.system.add_files('%s', '`parquet`.`%s`')",
- catalogName, tableName, fileTableDir.getAbsolutePath());
-
- Assert.assertEquals(2L, result);
-
- assertEquals(
- "Iceberg table contains correct data",
- sql("SELECT * FROM %s ORDER BY id", sourceTableName),
- sql("SELECT * FROM %s ORDER BY id", tableName));
- }
-
- @Test
- public void deleteAndAddBackUnpartitioned() {
- createUnpartitionedFileTable("parquet");
-
- String createIceberg =
- "CREATE TABLE %s (id Integer, name String, dept String, subdept String) USING iceberg";
-
- sql(createIceberg, tableName);
-
- sql(
- "CALL %s.system.add_files('%s', '`parquet`.`%s`')",
- catalogName, tableName, fileTableDir.getAbsolutePath());
-
- String deleteData = "DELETE FROM %s";
- sql(deleteData, tableName);
-
- Object result =
- scalarSql(
- "CALL %s.system.add_files('%s', '`parquet`.`%s`')",
- catalogName, tableName, fileTableDir.getAbsolutePath());
- Assert.assertEquals(2L, result);
-
- assertEquals(
- "Iceberg table contains correct data",
- sql("SELECT * FROM %s ORDER BY id", sourceTableName),
- sql("SELECT * FROM %s ORDER BY id", tableName));
- }
-
- @Ignore // TODO Classpath issues prevent us from actually writing to a Spark ORC table
- public void addDataUnpartitionedOrc() {
- createUnpartitionedFileTable("orc");
-
- String createIceberg =
- "CREATE TABLE %s (id Integer, name String, dept String, subdept String) USING iceberg";
-
- sql(createIceberg, tableName);
-
- Object result =
- scalarSql(
- "CALL %s.system.add_files('%s', '`orc`.`%s`')",
- catalogName, tableName, fileTableDir.getAbsolutePath());
-
- Assert.assertEquals(2L, result);
-
- assertEquals(
- "Iceberg table contains correct data",
- sql("SELECT * FROM %s ORDER BY id", sourceTableName),
- sql("SELECT * FROM %s ORDER BY id", tableName));
- }
-
- @Test
- public void addAvroFile() throws Exception {
- // Spark Session Catalog cannot load metadata tables
- // with "The namespace in session catalog must have exactly one name part"
- Assume.assumeFalse(catalogName.equals("spark_catalog"));
-
- // Create an Avro file
-
- Schema schema =
- SchemaBuilder.record("record")
- .fields()
- .requiredInt("id")
- .requiredString("data")
- .endRecord();
- GenericRecord record1 = new GenericData.Record(schema);
- record1.put("id", 1L);
- record1.put("data", "a");
- GenericRecord record2 = new GenericData.Record(schema);
- record2.put("id", 2L);
- record2.put("data", "b");
- File outputFile = temp.newFile("test.avro");
-
- DatumWriter datumWriter = new GenericDatumWriter(schema);
- DataFileWriter dataFileWriter = new DataFileWriter(datumWriter);
- dataFileWriter.create(schema, outputFile);
- dataFileWriter.append(record1);
- dataFileWriter.append(record2);
- dataFileWriter.close();
-
- String createIceberg = "CREATE TABLE %s (id Long, data String) USING iceberg";
- sql(createIceberg, tableName);
-
- Object result =
- scalarSql(
- "CALL %s.system.add_files('%s', '`avro`.`%s`')",
- catalogName, tableName, outputFile.getPath());
- Assert.assertEquals(1L, result);
-
- List