Skip to content

Commit

Permalink
Make schema case-insensitivity fail when the the schema has conflicti…
Browse files Browse the repository at this point in the history
…ng column names
  • Loading branch information
sl255051 committed Aug 1, 2024
1 parent 5308d39 commit b817cd4
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 1 deletion.
11 changes: 10 additions & 1 deletion api/src/main/java/org/apache/iceberg/types/TypeUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,16 @@ public static Map<String, Integer> indexByLowerCaseName(Types.StructType struct)
Map<String, Integer> indexByLowerCaseName = Maps.newHashMap();
indexByName(struct)
.forEach(
(name, integer) -> indexByLowerCaseName.put(name.toLowerCase(Locale.ROOT), integer));
(name, integer) -> {
String normalizedName = name.toLowerCase(Locale.ROOT);
if (indexByLowerCaseName.containsKey(normalizedName)) {
throw new IllegalArgumentException(
String.format(
"Schema does not support case-insensitivity; duplicate column name found in schema: %s and %s",
name, struct.toString()));
}
indexByLowerCaseName.put(normalizedName, integer);
});
return indexByLowerCaseName;
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iceberg;

import static org.apache.iceberg.types.Types.NestedField.required;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatIllegalArgumentException;

import org.apache.iceberg.types.Types;
import org.junit.jupiter.api.Test;

public class TestSchemaCaseSensitivity {

@Test
public void
testSchemaWithColumnNamesThatDifferOnlyInLetterCaseThrowsOnCaseInsensitiveFindField() {
Schema schema =
new Schema(
required(1, "id", Types.LongType.get()),
required(2, "data", Types.StringType.get()),
required(3, "DATA", Types.StringType.get()));
assertThatIllegalArgumentException()
.isThrownBy(() -> schema.caseInsensitiveFindField("DATA"))
.withMessageStartingWith(
"Schema does not support case-insensitivity; duplicate column name found in schema:");
}

@Test
public void
testSchemaWithColumnNamesThatDifferOnlyInLetterCaseSucceedsOnCaseSensitiveFindField() {
Schema schema =
new Schema(
required(1, "id", Types.LongType.get()),
required(2, "data", Types.StringType.get()),
required(3, "DATA", Types.StringType.get()));

Types.NestedField actual1 = schema.findField("data");
assertThat(actual1).isEqualTo(Types.NestedField.required(2, "data", Types.StringType.get()));
Types.NestedField actual2 = schema.findField("DATA");
assertThat(actual2).isEqualTo(Types.NestedField.required(3, "DATA", Types.StringType.get()));
}

@Test
public void testCaseInsensitiveFindFieldSucceeds() {
Schema schema =
new Schema(
required(1, "id", Types.LongType.get()), required(2, "data", Types.StringType.get()));

Types.NestedField actual1 = schema.caseInsensitiveFindField("Data");
assertThat(actual1).isEqualTo(Types.NestedField.required(2, "data", Types.StringType.get()));
}
}

0 comments on commit b817cd4

Please sign in to comment.