Skip to content

Commit

Permalink
Add example with rename column
Browse files Browse the repository at this point in the history
In Iceberg the tables are projected using field-IDs.
Even if the column is renamed (and Iceberg is lazy,
so the table is not rewritten), it should still read
the original column.
  • Loading branch information
Fokko committed Jan 31, 2024
1 parent 6cda37a commit de34439
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 5 deletions.
39 changes: 36 additions & 3 deletions tests/integration/iceberg/docker-compose/provision.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,7 @@

spark.sql(
"""
CREATE OR REPLACE TABLE default.add_new_column
CREATE OR REPLACE TABLE default.test_add_new_column
USING iceberg
AS SELECT
1 AS idx
Expand All @@ -336,5 +336,38 @@
"""
)

spark.sql("ALTER TABLE default.add_new_column ADD COLUMN name STRING")
spark.sql("INSERT INTO default.add_new_column VALUES (3, 'abc'), (4, 'def')")
spark.sql("ALTER TABLE default.test_add_new_column ADD COLUMN name STRING")
spark.sql("INSERT INTO default.test_add_new_column VALUES (3, 'abc'), (4, 'def')")

# In Iceberg the data and schema evolves independently. We can add a column
# that should show up when querying the data, but is not yet represented in a Parquet file

spark.sql(
"""
CREATE OR REPLACE TABLE default.test_new_column_with_no_data
USING iceberg
AS SELECT
1 AS idx
UNION ALL SELECT
2 AS idx
UNION ALL SELECT
3 AS idx
"""
)

spark.sql("ALTER TABLE default.test_new_column_with_no_data ADD COLUMN name STRING")

spark.sql(
"""
CREATE OR REPLACE TABLE default.test_table_rename
USING iceberg
AS SELECT
1 AS idx
UNION ALL SELECT
2 AS idx
UNION ALL SELECT
3 AS idx
"""
)

spark.sql("ALTER TABLE default.test_table_rename RENAME COLUMN idx TO pos")
6 changes: 4 additions & 2 deletions tests/integration/iceberg/test_table_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def test_daft_iceberg_table_open(local_iceberg_tables):


WORKING_SHOW_COLLECT = [
# "test_all_types", # ValueError: DaftError::ArrowError Not yet implemented: Deserializing type Decimal(10, 2) from parquet
"test_all_types", # ValueError: DaftError::ArrowError Not yet implemented: Deserializing type Decimal(10, 2) from parquet
"test_limit",
"test_null_nan",
"test_null_nan_rewritten",
Expand All @@ -37,7 +37,9 @@ def test_daft_iceberg_table_open(local_iceberg_tables):
# "test_table_sanitized_character", # Bug in scan().to_arrow().to_arrow()
"test_table_version", # we have bugs when loading no files
"test_uuid_and_fixed_unpartitioned",
"add_new_column",
"test_add_new_column",
"test_new_column_with_no_data",
"test_table_rename",
]


Expand Down

0 comments on commit de34439

Please sign in to comment.