From de34439f6ca43a62646e26f92e63440e1edcefd7 Mon Sep 17 00:00:00 2001
From: Fokko Driesprong <fokko@tabular.io>
Date: Wed, 31 Jan 2024 20:31:49 +0100
Subject: [PATCH] Add example with rename column

In Iceberg the tables are projected using field-IDs.
Even if the column is renamed (and Iceberg is lazy,
so the table is not rewritten), it should still read
the original column.
---
 .../iceberg/docker-compose/provision.py       | 39 +++++++++++++++++--
 tests/integration/iceberg/test_table_load.py  |  6 ++-
 2 files changed, 40 insertions(+), 5 deletions(-)

diff --git a/tests/integration/iceberg/docker-compose/provision.py b/tests/integration/iceberg/docker-compose/provision.py
index cce6362fdc..5ab42108b6 100644
--- a/tests/integration/iceberg/docker-compose/provision.py
+++ b/tests/integration/iceberg/docker-compose/provision.py
@@ -325,7 +325,7 @@
 
 spark.sql(
     """
-  CREATE OR REPLACE TABLE default.add_new_column
+  CREATE OR REPLACE TABLE default.test_add_new_column
   USING iceberg
   AS SELECT
         1            AS idx
@@ -336,5 +336,38 @@
 """
 )
 
-spark.sql("ALTER TABLE default.add_new_column ADD COLUMN name STRING")
-spark.sql("INSERT INTO default.add_new_column VALUES (3, 'abc'), (4, 'def')")
+spark.sql("ALTER TABLE default.test_add_new_column ADD COLUMN name STRING")
+spark.sql("INSERT INTO default.test_add_new_column VALUES (3, 'abc'), (4, 'def')")
+
+# In Iceberg the data and schema evolves independently. We can add a column
+# that should show up when querying the data, but is not yet represented in a Parquet file
+
+spark.sql(
+    """
+  CREATE OR REPLACE TABLE default.test_new_column_with_no_data
+  USING iceberg
+  AS SELECT
+        1            AS idx
+    UNION ALL SELECT
+        2            AS idx
+    UNION ALL SELECT
+        3            AS idx
+"""
+)
+
+spark.sql("ALTER TABLE default.test_new_column_with_no_data ADD COLUMN name STRING")
+
+spark.sql(
+    """
+  CREATE OR REPLACE TABLE default.test_table_rename
+  USING iceberg
+  AS SELECT
+        1            AS idx
+    UNION ALL SELECT
+        2            AS idx
+    UNION ALL SELECT
+        3            AS idx
+"""
+)
+
+spark.sql("ALTER TABLE default.test_table_rename RENAME COLUMN idx TO pos")
diff --git a/tests/integration/iceberg/test_table_load.py b/tests/integration/iceberg/test_table_load.py
index 4b7692e3c4..5c3616e67d 100644
--- a/tests/integration/iceberg/test_table_load.py
+++ b/tests/integration/iceberg/test_table_load.py
@@ -21,7 +21,7 @@ def test_daft_iceberg_table_open(local_iceberg_tables):
 
 
 WORKING_SHOW_COLLECT = [
-    # "test_all_types", # ValueError: DaftError::ArrowError Not yet implemented: Deserializing type Decimal(10, 2) from parquet
+    "test_all_types", # ValueError: DaftError::ArrowError Not yet implemented: Deserializing type Decimal(10, 2) from parquet
     "test_limit",
     "test_null_nan",
     "test_null_nan_rewritten",
@@ -37,7 +37,9 @@ def test_daft_iceberg_table_open(local_iceberg_tables):
     # "test_table_sanitized_character", # Bug in scan().to_arrow().to_arrow()
     "test_table_version",  # we have bugs when loading no files
     "test_uuid_and_fixed_unpartitioned",
-    "add_new_column",
+    "test_add_new_column",
+    "test_new_column_with_no_data",
+    "test_table_rename",
 ]