Add example with rename column

In Iceberg the tables are projected using field-IDs. Even if the column is renamed (and Iceberg is lazy, so the table is not rewritten), it should still read the original column.
Eventual-Inc · Jan 31, 2024 · de34439 · de34439
1 parent 6cda37a
commit de34439
Show file tree

Hide file tree

Showing 2 changed files with 40 additions and 5 deletions.
diff --git a/tests/integration/iceberg/docker-compose/provision.py b/tests/integration/iceberg/docker-compose/provision.py
@@ -325,7 +325,7 @@
 
 spark.sql(
     """
-  CREATE OR REPLACE TABLE default.add_new_column
+  CREATE OR REPLACE TABLE default.test_add_new_column
   USING iceberg
   AS SELECT
         1            AS idx
@@ -336,5 +336,38 @@
 """
 )
 
-spark.sql("ALTER TABLE default.add_new_column ADD COLUMN name STRING")
-spark.sql("INSERT INTO default.add_new_column VALUES (3, 'abc'), (4, 'def')")
+spark.sql("ALTER TABLE default.test_add_new_column ADD COLUMN name STRING")
+spark.sql("INSERT INTO default.test_add_new_column VALUES (3, 'abc'), (4, 'def')")
+
+# In Iceberg the data and schema evolves independently. We can add a column
+# that should show up when querying the data, but is not yet represented in a Parquet file
+
+spark.sql(
+    """
+  CREATE OR REPLACE TABLE default.test_new_column_with_no_data
+  USING iceberg
+  AS SELECT
+        1            AS idx
+    UNION ALL SELECT
+        2            AS idx
+    UNION ALL SELECT
+        3            AS idx
+"""
+)
+
+spark.sql("ALTER TABLE default.test_new_column_with_no_data ADD COLUMN name STRING")
+
+spark.sql(
+    """
+  CREATE OR REPLACE TABLE default.test_table_rename
+  USING iceberg
+  AS SELECT
+        1            AS idx
+    UNION ALL SELECT
+        2            AS idx
+    UNION ALL SELECT
+        3            AS idx
+"""
+)
+
+spark.sql("ALTER TABLE default.test_table_rename RENAME COLUMN idx TO pos")
diff --git a/tests/integration/iceberg/test_table_load.py b/tests/integration/iceberg/test_table_load.py
@@ -21,7 +21,7 @@ def test_daft_iceberg_table_open(local_iceberg_tables):
 
 
 WORKING_SHOW_COLLECT = [
-    # "test_all_types", # ValueError: DaftError::ArrowError Not yet implemented: Deserializing type Decimal(10, 2) from parquet
+    "test_all_types", # ValueError: DaftError::ArrowError Not yet implemented: Deserializing type Decimal(10, 2) from parquet
     "test_limit",
     "test_null_nan",
     "test_null_nan_rewritten",
@@ -37,7 +37,9 @@ def test_daft_iceberg_table_open(local_iceberg_tables):
     # "test_table_sanitized_character", # Bug in scan().to_arrow().to_arrow()
     "test_table_version",  # we have bugs when loading no files
     "test_uuid_and_fixed_unpartitioned",
-    "add_new_column",
+    "test_add_new_column",
+    "test_new_column_with_no_data",
+    "test_table_rename",
 ]