Skip to content

Commit

Permalink
Fix handling of top level fields
Browse files Browse the repository at this point in the history
  • Loading branch information
RussellSpitzer committed Oct 6, 2023
1 parent fef689d commit b27db5a
Show file tree
Hide file tree
Showing 2 changed files with 76 additions and 42 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@

import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.iceberg.Schema;
import org.apache.iceberg.UpdateSchema;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
Expand All @@ -29,9 +28,9 @@

/**
* Visitor class that accumulates the set of changes needed to evolve an existing schema into the
* union of the existing and a new schema. If all old elements of the schema are present in the
* new schema in the same order as the existing schema, places new elements in the same places as
* they exist in the new schema. Changes are added to an {@link UpdateSchema} operation.
* union of the existing and a new schema. If all old elements of the schema are present in the new
* schema in the same order as the existing schema, places new elements in the same places as they
* exist in the new schema. Changes are added to an {@link UpdateSchema} operation.
*/
public class UnionByNameVisitor extends SchemaWithPartnerVisitor<Integer, Boolean> {

Expand Down Expand Up @@ -102,9 +101,9 @@ public Boolean struct(
} else {
lastPartnerField += 1;
Types.NestedField nestedField =
caseSensitive
? partnerStruct.field(field.name())
: partnerStruct.caseInsensitiveField(field.name());
caseSensitive
? partnerStruct.field(field.name())
: partnerStruct.caseInsensitiveField(field.name());
updateColumn(field, nestedField);
}
}
Expand Down Expand Up @@ -164,10 +163,12 @@ private Type findFieldType(int fieldId) {
}
}

private boolean nonMissingElementsInOrder(Types.StructType oldStruct, Types.StructType newStruct) {
private boolean nonMissingElementsInOrder(
Types.StructType oldStruct, Types.StructType newStruct) {
List<String> oldFields =
oldStruct.fields().stream().map(Types.NestedField::name).collect(Collectors.toList());
List<String> newFields = newStruct.fields().stream().map(Types.NestedField::name).collect(Collectors.toList());
List<String> newFields =
newStruct.fields().stream().map(Types.NestedField::name).collect(Collectors.toList());

newFields.retainAll(oldFields);

Expand All @@ -182,7 +183,11 @@ private void addColumnBefore(int parentId, Types.NestedField field, Types.Nested
String parentName = partnerSchema.findColumnName(parentId);
api.addColumn(parentName, field.name(), field.type(), field.doc());
if (nextField != null) {
api.moveBefore(parentName + "." + field.name(), parentName + "." + nextField.name());
if (parentName != null) {
api.moveBefore(parentName + "." + field.name(), parentName + "." + nextField.name());
} else {
api.moveBefore(field.name(), nextField.name());
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -581,49 +581,78 @@ public void testAppendNestedLists() {

@Test
public void testInsertNestedPrimitiveIntoMiddleOfStruct() {
Schema currentSchema = new Schema(optional(1, "aStruct", Types.StructType.of(
optional(2, "string", Types.StringType.get()),
optional(3, "long", Types.StringType.get()),
optional( 4, "substruct", Types.StructType.of(
optional(5, "deepint", Types.IntegerType.get()))))));

Schema newSchema = new Schema(optional(1, "aStruct", Types.StructType.of(
optional(2, "string", Types.StringType.get()),
optional(8, "string2", Types.StringType.get()),
optional(9, "string3", Types.StringType.get()),
optional(3, "long", Types.StringType.get()),
optional( 4, "substruct", Types.StructType.of(
optional( 6, "firstnewdeepInt", Types.IntegerType.get()),
optional( 7, "newdeepInt", Types.IntegerType.get()),
optional(5, "deepint", Types.IntegerType.get()))),
optional(10, "lastint", Types.IntegerType.get()))));
Schema currentSchema =
new Schema(
optional(
1,
"aStruct",
Types.StructType.of(
optional(2, "string", Types.StringType.get()),
optional(3, "long", Types.StringType.get()),
optional(
4,
"substruct",
Types.StructType.of(optional(5, "deepint", Types.IntegerType.get()))))));

Schema newSchema =
new Schema(
optional(11, "newfirstColumn", Types.IntegerType.get()),
optional(
1,
"aStruct",
Types.StructType.of(
optional(2, "string", Types.StringType.get()),
optional(8, "string2", Types.StringType.get()),
optional(9, "string3", Types.StringType.get()),
optional(3, "long", Types.StringType.get()),
optional(
4,
"substruct",
Types.StructType.of(
optional(6, "firstnewdeepInt", Types.IntegerType.get()),
optional(7, "newdeepInt", Types.IntegerType.get()),
optional(5, "deepint", Types.IntegerType.get()))),
optional(10, "lastint", Types.IntegerType.get()))));

Schema applied = new SchemaUpdate(currentSchema, 5).unionByNameWith(newSchema).apply();
Assert.assertEquals(newSchema.asStruct(), applied.asStruct());
}

@Test
public void testInsertNestedPrimitiveOriginalFieldsOOOrder() {
Schema currentSchema = new Schema(optional(1, "aStruct", Types.StructType.of(
optional(2, "field1", Types.StringType.get()),
optional(3, "field2", Types.StringType.get()),
optional( 4, "field3", Types.StringType.get()))));

Schema newSchema = new Schema(optional(1, "aStruct", Types.StructType.of(
optional(5, "field4", Types.StringType.get()),
optional(3, "field2", Types.StringType.get()),
optional(4, "field3", Types.StringType.get()),
optional(2, "field1", Types.StringType.get()))));
Schema currentSchema =
new Schema(
optional(
1,
"aStruct",
Types.StructType.of(
optional(2, "field1", Types.StringType.get()),
optional(3, "field2", Types.StringType.get()),
optional(4, "field3", Types.StringType.get()))));

Schema expectedSchema = new Schema(optional(1, "aStruct", Types.StructType.of(
optional(2, "field1", Types.StringType.get()),
optional(3, "field2", Types.StringType.get()),
optional(4, "field3", Types.StringType.get()),
optional(5, "field4", Types.StringType.get()))));
Schema newSchema =
new Schema(
optional(
1,
"aStruct",
Types.StructType.of(
optional(5, "field4", Types.StringType.get()),
optional(3, "field2", Types.StringType.get()),
optional(4, "field3", Types.StringType.get()),
optional(2, "field1", Types.StringType.get()))));

Schema expectedSchema =
new Schema(
optional(
1,
"aStruct",
Types.StructType.of(
optional(2, "field1", Types.StringType.get()),
optional(3, "field2", Types.StringType.get()),
optional(4, "field3", Types.StringType.get()),
optional(5, "field4", Types.StringType.get()))));

Schema applied = new SchemaUpdate(currentSchema, 4).unionByNameWith(newSchema).apply();
Assert.assertEquals(expectedSchema.asStruct(), applied.asStruct());
}

}

0 comments on commit b27db5a

Please sign in to comment.