From 9c6bcf1c732a02a79143e723dae0c9fc708df36a Mon Sep 17 00:00:00 2001 From: Eduard Tudenhoefner Date: Tue, 19 Dec 2023 16:44:17 +0100 Subject: [PATCH] Spark: Throw exception on `ALTER VIEW AS ` `ALTER VIEW AS ` doesn't allow to preserve column aliases and column comment when the underlying query is changed, which can lead to unexpected behavior. For now it's better to use `CREATE OR REPLACE VIEW` as that is more explicit when the schema of the view is defined with column aliases/comments. --- .../analysis/RewriteViewCommands.scala | 5 ++ .../iceberg/spark/extensions/TestViews.java | 67 ++++++++++++++++++- 2 files changed, 70 insertions(+), 2 deletions(-) diff --git a/spark/v3.5/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteViewCommands.scala b/spark/v3.5/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteViewCommands.scala index 066ba59394d7..32578b933f64 100644 --- a/spark/v3.5/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteViewCommands.scala +++ b/spark/v3.5/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteViewCommands.scala @@ -22,12 +22,14 @@ package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.expressions.SubqueryExpression +import org.apache.spark.sql.catalyst.plans.logical.AlterViewAs import org.apache.spark.sql.catalyst.plans.logical.CreateView import org.apache.spark.sql.catalyst.plans.logical.DropView import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.plans.logical.View import org.apache.spark.sql.catalyst.plans.logical.views.CreateIcebergView import org.apache.spark.sql.catalyst.plans.logical.views.DropIcebergView +import org.apache.spark.sql.catalyst.plans.logical.views.ResolvedV2View import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.connector.catalog.CatalogManager import org.apache.spark.sql.connector.catalog.CatalogPlugin @@ -60,6 +62,9 @@ case class RewriteViewCommands(spark: SparkSession) extends Rule[LogicalPlan] wi properties = properties, allowExisting = allowExisting, replace = replace) + + case AlterViewAs(ResolvedV2View(_, _), _, _) => + throw new AnalysisException("ALTER VIEW AS is not supported. Use CREATE OR REPLACE VIEW instead") } private def isTempView(nameParts: Seq[String]): Boolean = { diff --git a/spark/v3.5/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestViews.java b/spark/v3.5/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestViews.java index bf6509afee77..360ce3d9bbc2 100644 --- a/spark/v3.5/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestViews.java +++ b/spark/v3.5/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestViews.java @@ -25,7 +25,6 @@ import java.util.List; import java.util.Map; import java.util.Random; -import java.util.UUID; import java.util.stream.Collectors; import java.util.stream.IntStream; import org.apache.iceberg.IcebergBuild; @@ -1149,10 +1148,74 @@ public void createViewWithSubqueryExpressionUsingGlobalTempView() { "because it references to the temporary object global_temp.%s", globalTempView)); } + @Test + public void createOrReplaceViewWithColumnAliases() throws NoSuchTableException { + insertRows(6); + String viewName = viewName("viewWithColumnAliases"); + + sql( + "CREATE VIEW %s (new_id COMMENT 'ID', new_data COMMENT 'DATA') AS SELECT id, data FROM %s WHERE id <= 3", + viewName, tableName); + + View view = viewCatalog().loadView(TableIdentifier.of(NAMESPACE, viewName)); + assertThat(view.properties()).containsEntry("queryColumnNames", "id,data"); + + assertThat(view.schema().columns()).hasSize(2); + Types.NestedField first = view.schema().columns().get(0); + assertThat(first.name()).isEqualTo("new_id"); + assertThat(first.doc()).isEqualTo("ID"); + + Types.NestedField second = view.schema().columns().get(1); + assertThat(second.name()).isEqualTo("new_data"); + assertThat(second.doc()).isEqualTo("DATA"); + + assertThat(sql("SELECT new_id FROM %s", viewName)) + .hasSize(3) + .containsExactlyInAnyOrder(row(1), row(2), row(3)); + + sql( + "CREATE OR REPLACE VIEW %s (data2 COMMENT 'new data', id2 COMMENT 'new ID') AS SELECT data, id FROM %s WHERE id <= 3", + viewName, tableName); + + assertThat(sql("SELECT data2, id2 FROM %s", viewName)) + .hasSize(3) + .containsExactlyInAnyOrder(row("2", 1), row("4", 2), row("6", 3)); + + view = viewCatalog().loadView(TableIdentifier.of(NAMESPACE, viewName)); + assertThat(view.properties()).containsEntry("queryColumnNames", "data,id"); + + assertThat(view.schema().columns()).hasSize(2); + first = view.schema().columns().get(0); + assertThat(first.name()).isEqualTo("data2"); + assertThat(first.doc()).isEqualTo("new data"); + + second = view.schema().columns().get(1); + assertThat(second.name()).isEqualTo("id2"); + assertThat(second.doc()).isEqualTo("new ID"); + } + + @Test + public void alterViewIsNotSupported() throws NoSuchTableException { + insertRows(6); + String viewName = "alteredView"; + + sql("CREATE VIEW %s AS SELECT id, data FROM %s WHERE id <= 3", viewName, tableName); + + assertThat(sql("SELECT id FROM %s", viewName)) + .hasSize(3) + .containsExactlyInAnyOrder(row(1), row(2), row(3)); + + assertThatThrownBy( + () -> sql("ALTER VIEW %s AS SELECT id FROM %s WHERE id > 3", viewName, tableName)) + .isInstanceOf(AnalysisException.class) + .hasMessageContaining( + "ALTER VIEW AS is not supported. Use CREATE OR REPLACE VIEW instead"); + } + private void insertRows(int numRows) throws NoSuchTableException { List records = Lists.newArrayListWithCapacity(numRows); for (int i = 1; i <= numRows; i++) { - records.add(new SimpleRecord(i, UUID.randomUUID().toString())); + records.add(new SimpleRecord(i, Integer.toString(i * 2))); } Dataset df = spark.createDataFrame(records, SimpleRecord.class);