From 9aeea63f8b74a6e0a468baaf7c705c19df0ebfb7 Mon Sep 17 00:00:00 2001 From: Nan Zhu Date: Fri, 22 Sep 2023 08:32:38 -0700 Subject: [PATCH] Core: Move a column with the same name as a deleted column (#8325) * allow move a column naming the same as a deleted column * refactor * stylistic fixes * fix code style * fix toString of Move * comments * fix the compilation * fix merge errors --- .../java/org/apache/iceberg/SchemaUpdate.java | 17 +- .../org/apache/iceberg/TestSchemaUpdate.java | 179 +++++++++++++++++- 2 files changed, 194 insertions(+), 2 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/SchemaUpdate.java b/core/src/main/java/org/apache/iceberg/SchemaUpdate.java index c4f879924379..069097778606 100644 --- a/core/src/main/java/org/apache/iceberg/SchemaUpdate.java +++ b/core/src/main/java/org/apache/iceberg/SchemaUpdate.java @@ -387,11 +387,17 @@ public UpdateSchema caseSensitive(boolean caseSensitivity) { } private Integer findForMove(String name) { + Integer addedId = addedNameToId.get(name); + if (addedId != null) { + return addedId; + } + Types.NestedField field = findField(name); if (field != null) { return field.fieldId(); } - return addedNameToId.get(name); + + return null; } private void internalMove(String name, Move move) { @@ -790,6 +796,15 @@ private enum MoveType { AFTER } + @Override + public String toString() { + String suffix = ""; + if (type != MoveType.FIRST) { + suffix = " field " + referenceFieldId; + } + return "Move column " + fieldId + " " + type.toString() + suffix; + } + static Move first(int fieldId) { return new Move(fieldId, -1, MoveType.FIRST); } diff --git a/core/src/test/java/org/apache/iceberg/TestSchemaUpdate.java b/core/src/test/java/org/apache/iceberg/TestSchemaUpdate.java index 04cb64403523..1d903dfbb1a5 100644 --- a/core/src/test/java/org/apache/iceberg/TestSchemaUpdate.java +++ b/core/src/test/java/org/apache/iceberg/TestSchemaUpdate.java @@ -303,7 +303,7 @@ public void testUpdateFailure() { continue; } - String typeChange = fromType.toString() + " -> " + toType.toString(); + String typeChange = fromType + " -> " + toType.toString(); Assertions.assertThatThrownBy( () -> new SchemaUpdate(fromSchema, 1).updateColumn("col", toType)) .isInstanceOf(IllegalArgumentException.class) @@ -2067,4 +2067,181 @@ public void testMoveIdentifierFieldsCaseInsensitive() { Sets.newHashSet(SCHEMA.findField("id").fieldId()), newSchema.identifierFieldIds()); } + + @Test + public void testMoveTopDeletedColumnAfterAnotherColumn() { + Schema schema = + new Schema( + required(1, "id", Types.LongType.get()), + required(2, "data", Types.StringType.get()), + required(3, "data_1", Types.StringType.get())); + Schema expected = + new Schema( + required(2, "data", Types.StringType.get()), + required(4, "id", Types.IntegerType.get()), + required(3, "data_1", Types.StringType.get())); + + Schema actual = + new SchemaUpdate(schema, 3) + .allowIncompatibleChanges() + .deleteColumn("id") + .addRequiredColumn("id", Types.IntegerType.get()) + .moveAfter("id", "data") + .apply(); + Assert.assertEquals( + "Should move deleted column correctly", expected.asStruct(), actual.asStruct()); + } + + @Test + public void testMoveTopDeletedColumnBeforeAnotherColumn() { + Schema schema = + new Schema( + required(1, "id", Types.LongType.get()), + required(2, "data", Types.StringType.get()), + required(3, "data_1", Types.StringType.get())); + Schema expected = + new Schema( + required(2, "data", Types.StringType.get()), + required(4, "id", Types.IntegerType.get()), + required(3, "data_1", Types.StringType.get())); + + Schema actual = + new SchemaUpdate(schema, 3) + .allowIncompatibleChanges() + .deleteColumn("id") + .addRequiredColumn("id", Types.IntegerType.get()) + .moveBefore("id", "data_1") + .apply(); + Assert.assertEquals( + "Should move deleted column correctly", expected.asStruct(), actual.asStruct()); + } + + @Test + public void testMoveTopDeletedColumnToFirst() { + Schema schema = + new Schema( + required(1, "id", Types.LongType.get()), + required(2, "data", Types.StringType.get()), + required(3, "data_1", Types.StringType.get())); + Schema expected = + new Schema( + required(4, "id", Types.IntegerType.get()), + required(2, "data", Types.StringType.get()), + required(3, "data_1", Types.StringType.get())); + + Schema actual = + new SchemaUpdate(schema, 3) + .allowIncompatibleChanges() + .deleteColumn("id") + .addRequiredColumn("id", Types.IntegerType.get()) + .moveFirst("id") + .apply(); + Assert.assertEquals( + "Should move deleted column correctly", expected.asStruct(), actual.asStruct()); + } + + @Test + public void testMoveDeletedNestedStructFieldAfterAnotherColumn() { + Schema schema = + new Schema( + required(1, "id", Types.LongType.get()), + required( + 2, + "struct", + Types.StructType.of( + required(3, "count", Types.LongType.get()), + required(4, "data", Types.StringType.get()), + required(5, "data_1", Types.StringType.get())))); + Schema expected = + new Schema( + required(1, "id", Types.LongType.get()), + required( + 2, + "struct", + Types.StructType.of( + required(3, "count", Types.LongType.get()), + required(6, "data", Types.IntegerType.get()), + required(5, "data_1", Types.StringType.get())))); + + Schema actual = + new SchemaUpdate(schema, 5) + .allowIncompatibleChanges() + .deleteColumn("struct.data") + .addRequiredColumn("struct", "data", Types.IntegerType.get()) + .moveAfter("struct.data", "struct.count") + .apply(); + + Assert.assertEquals( + "Should move deleted nested column correctly", expected.asStruct(), actual.asStruct()); + } + + @Test + public void testMoveDeletedNestedStructFieldBeforeAnotherColumn() { + Schema schema = + new Schema( + required(1, "id", Types.LongType.get()), + required( + 2, + "struct", + Types.StructType.of( + required(3, "count", Types.LongType.get()), + required(4, "data", Types.StringType.get()), + required(5, "data_1", Types.StringType.get())))); + Schema expected = + new Schema( + required(1, "id", Types.LongType.get()), + required( + 2, + "struct", + Types.StructType.of( + required(3, "count", Types.LongType.get()), + required(6, "data", Types.IntegerType.get()), + required(5, "data_1", Types.StringType.get())))); + + Schema actual = + new SchemaUpdate(schema, 5) + .allowIncompatibleChanges() + .deleteColumn("struct.data") + .addRequiredColumn("struct", "data", Types.IntegerType.get()) + .moveBefore("struct.data", "struct.data_1") + .apply(); + + Assert.assertEquals( + "Should move deleted nested column correctly", expected.asStruct(), actual.asStruct()); + } + + @Test + public void testMoveDeletedNestedStructFieldToFirst() { + Schema schema = + new Schema( + required(1, "id", Types.LongType.get()), + required( + 2, + "struct", + Types.StructType.of( + required(3, "count", Types.LongType.get()), + required(4, "data", Types.StringType.get()), + required(5, "data_1", Types.StringType.get())))); + Schema expected = + new Schema( + required(1, "id", Types.LongType.get()), + required( + 2, + "struct", + Types.StructType.of( + required(6, "data", Types.IntegerType.get()), + required(3, "count", Types.LongType.get()), + required(5, "data_1", Types.StringType.get())))); + + Schema actual = + new SchemaUpdate(schema, 5) + .allowIncompatibleChanges() + .deleteColumn("struct.data") + .addRequiredColumn("struct", "data", Types.IntegerType.get()) + .moveFirst("struct.data") + .apply(); + + Assert.assertEquals( + "Should move deleted nested column correctly", expected.asStruct(), actual.asStruct()); + } }