From 2b4592c85e2f192619a53277f8af04ad603e5eee Mon Sep 17 00:00:00 2001 From: Eduard Tudenhoefner Date: Tue, 15 Oct 2024 13:20:29 +0200 Subject: [PATCH] Core: Align CharSequenceSet impl with Data/DeleteFileSet --- .../apache/iceberg/util/CharSequenceSet.java | 178 +++--------------- .../iceberg/util/CharSequenceWrapper.java | 5 +- .../iceberg/util/TestCharSequenceSet.java | 175 ++++++++++++++++- 3 files changed, 197 insertions(+), 161 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/util/CharSequenceSet.java b/api/src/main/java/org/apache/iceberg/util/CharSequenceSet.java index cfdac0104c47..c13cbfd0cc28 100644 --- a/api/src/main/java/org/apache/iceberg/util/CharSequenceSet.java +++ b/api/src/main/java/org/apache/iceberg/util/CharSequenceSet.java @@ -18,183 +18,53 @@ */ package org.apache.iceberg.util; -import java.io.Serializable; -import java.util.Collection; -import java.util.Iterator; -import java.util.Set; -import java.util.stream.Collectors; -import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.relocated.com.google.common.collect.Iterables; -import org.apache.iceberg.relocated.com.google.common.collect.Iterators; -import org.apache.iceberg.relocated.com.google.common.collect.Sets; -import org.apache.iceberg.relocated.com.google.common.collect.Streams; -public class CharSequenceSet implements Set, Serializable { +public class CharSequenceSet extends WrapperSet { private static final ThreadLocal WRAPPERS = ThreadLocal.withInitial(() -> CharSequenceWrapper.wrap(null)); - public static CharSequenceSet of(Iterable charSequences) { - return new CharSequenceSet(charSequences); - } - - public static CharSequenceSet empty() { - return new CharSequenceSet(ImmutableList.of()); - } - - private final Set wrapperSet; - - private CharSequenceSet(Iterable charSequences) { - this.wrapperSet = - Sets.newHashSet(Iterables.transform(charSequences, CharSequenceWrapper::wrap)); + private CharSequenceSet() { + // needed for serialization } - @Override - public int size() { - return wrapperSet.size(); + private CharSequenceSet(Iterable charSequences) { + super( + Iterables.transform( + charSequences, + obj -> { + Preconditions.checkNotNull(obj, "Invalid object: null"); + return CharSequenceWrapper.wrap(obj); + })); } - @Override - public boolean isEmpty() { - return wrapperSet.isEmpty(); + public static CharSequenceSet of(Iterable charSequences) { + return new CharSequenceSet(charSequences); } - @Override - public boolean contains(Object obj) { - if (obj instanceof CharSequence) { - CharSequenceWrapper wrapper = WRAPPERS.get(); - boolean result = wrapperSet.contains(wrapper.set((CharSequence) obj)); - wrapper.set(null); // don't hold a reference to the value - return result; - } - return false; + public static CharSequenceSet empty() { + return new CharSequenceSet(); } @Override - public Iterator iterator() { - return Iterators.transform(wrapperSet.iterator(), CharSequenceWrapper::get); + protected Wrapper wrapper() { + return WRAPPERS.get(); } @Override - public Object[] toArray() { - return Iterators.toArray(iterator(), CharSequence.class); + protected Wrapper wrap(CharSequence file) { + return CharSequenceWrapper.wrap(file); } @Override - @SuppressWarnings("unchecked") - public T[] toArray(T[] destArray) { - int size = wrapperSet.size(); - if (destArray.length < size) { - return (T[]) toArray(); - } - - Iterator iter = iterator(); - int ind = 0; - while (iter.hasNext()) { - destArray[ind] = (T) iter.next(); - ind += 1; - } - - if (destArray.length > size) { - destArray[size] = null; - } - - return destArray; + protected Class elementClass() { + return CharSequence.class; } @Override public boolean add(CharSequence charSequence) { - return wrapperSet.add(CharSequenceWrapper.wrap(charSequence)); - } - - @Override - public boolean remove(Object obj) { - if (obj instanceof CharSequence) { - CharSequenceWrapper wrapper = WRAPPERS.get(); - boolean result = wrapperSet.remove(wrapper.set((CharSequence) obj)); - wrapper.set(null); // don't hold a reference to the value - return result; - } - return false; - } - - @Override - @SuppressWarnings("CollectionUndefinedEquality") - public boolean containsAll(Collection objects) { - if (objects != null) { - return Iterables.all(objects, this::contains); - } - return false; - } - - @Override - public boolean addAll(Collection charSequences) { - if (charSequences != null) { - return Iterables.addAll( - wrapperSet, Iterables.transform(charSequences, CharSequenceWrapper::wrap)); - } - return false; - } - - @Override - public boolean retainAll(Collection objects) { - if (objects != null) { - Set toRetain = - objects.stream() - .filter(CharSequence.class::isInstance) - .map(CharSequence.class::cast) - .map(CharSequenceWrapper::wrap) - .collect(Collectors.toSet()); - - return Iterables.retainAll(wrapperSet, toRetain); - } - - return false; - } - - @Override - @SuppressWarnings("CollectionUndefinedEquality") - public boolean removeAll(Collection objects) { - if (objects != null) { - return objects.stream().filter(this::remove).count() != 0; - } - - return false; - } - - @Override - public void clear() { - wrapperSet.clear(); - } - - @SuppressWarnings("CollectionUndefinedEquality") - @Override - public boolean equals(Object other) { - if (this == other) { - return true; - } else if (!(other instanceof Set)) { - return false; - } - - Set that = (Set) other; - - if (size() != that.size()) { - return false; - } - - try { - return containsAll(that); - } catch (ClassCastException | NullPointerException unused) { - return false; - } - } - - @Override - public int hashCode() { - return wrapperSet.stream().mapToInt(CharSequenceWrapper::hashCode).sum(); - } - - @Override - public String toString() { - return Streams.stream(iterator()).collect(Collectors.joining("CharSequenceSet({", ", ", "})")); + // method is needed to not break API compatibility + return super.add(charSequence); } } diff --git a/api/src/main/java/org/apache/iceberg/util/CharSequenceWrapper.java b/api/src/main/java/org/apache/iceberg/util/CharSequenceWrapper.java index 854264c1ae21..59e8eb712dc6 100644 --- a/api/src/main/java/org/apache/iceberg/util/CharSequenceWrapper.java +++ b/api/src/main/java/org/apache/iceberg/util/CharSequenceWrapper.java @@ -18,12 +18,11 @@ */ package org.apache.iceberg.util; -import java.io.Serializable; import org.apache.iceberg.types.Comparators; import org.apache.iceberg.types.JavaHashes; /** Wrapper class to adapt CharSequence for use in maps and sets. */ -public class CharSequenceWrapper implements CharSequence, Serializable { +public class CharSequenceWrapper implements CharSequence, WrapperSet.Wrapper { public static CharSequenceWrapper wrap(CharSequence seq) { return new CharSequenceWrapper(seq); } @@ -39,6 +38,7 @@ private CharSequenceWrapper(CharSequence wrapped) { this.wrapped = wrapped; } + @Override public CharSequenceWrapper set(CharSequence newWrapped) { this.wrapped = newWrapped; this.hashCode = 0; @@ -46,6 +46,7 @@ public CharSequenceWrapper set(CharSequence newWrapped) { return this; } + @Override public CharSequence get() { return wrapped; } diff --git a/api/src/test/java/org/apache/iceberg/util/TestCharSequenceSet.java b/api/src/test/java/org/apache/iceberg/util/TestCharSequenceSet.java index 324742c07a2d..093d2a0c6b87 100644 --- a/api/src/test/java/org/apache/iceberg/util/TestCharSequenceSet.java +++ b/api/src/test/java/org/apache/iceberg/util/TestCharSequenceSet.java @@ -19,10 +19,12 @@ package org.apache.iceberg.util; import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; import java.util.Arrays; import java.util.Collections; import java.util.Set; +import org.apache.iceberg.TestHelpers; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet; import org.junit.jupiter.api.Test; @@ -42,15 +44,115 @@ public void testSearchingInCharSequenceCollection() { @Test public void nullString() { - assertThat(CharSequenceSet.of(Arrays.asList((String) null))).contains((String) null); + assertThatThrownBy(() -> CharSequenceSet.of(Arrays.asList((String) null))) + .isInstanceOf(NullPointerException.class) + .hasMessage("Invalid object: null"); assertThat(CharSequenceSet.empty()).doesNotContain((String) null); } + @Test + public void emptySet() { + assertThat(CharSequenceSet.empty()).isEmpty(); + assertThat(CharSequenceSet.empty()).doesNotContain("a", "b", "c"); + } + + @Test + public void insertionOrderIsMaintained() { + CharSequenceSet set = CharSequenceSet.empty(); + set.addAll(ImmutableList.of("d", "a", "c")); + set.add("b"); + set.add("d"); + + assertThat(set).hasSize(4).containsExactly("d", "a", "c", "b"); + } + + @Test + public void clear() { + CharSequenceSet set = CharSequenceSet.of(ImmutableList.of("a", "b")); + set.clear(); + assertThat(set).isEmpty(); + } + + @Test + public void addAll() { + CharSequenceSet empty = CharSequenceSet.empty(); + assertThatThrownBy(() -> empty.add(null)) + .isInstanceOf(NullPointerException.class) + .hasMessage("Invalid object: null"); + + assertThatThrownBy(() -> empty.addAll(null)) + .isInstanceOf(NullPointerException.class) + .hasMessage("Invalid collection: null"); + + assertThatThrownBy(() -> empty.addAll(Collections.singletonList(null))) + .isInstanceOf(NullPointerException.class) + .hasMessage("Invalid object: null"); + + assertThatThrownBy(() -> empty.addAll(Arrays.asList("a", null))) + .isInstanceOf(NullPointerException.class) + .hasMessage("Invalid object: null"); + + CharSequenceSet set = CharSequenceSet.empty(); + set.addAll(ImmutableList.of("b", "a", "c", "a")); + assertThat(set).hasSize(3).containsExactly("b", "a", "c"); + } + + @Test + public void contains() { + CharSequenceSet set = CharSequenceSet.of(ImmutableList.of("b", "a")); + assertThatThrownBy(() -> set.contains(null)) + .isInstanceOf(NullPointerException.class) + .hasMessage("Invalid object: null"); + + assertThat(set).hasSize(2).containsExactly("b", "a").doesNotContain("c").doesNotContain("d"); + + assertThatThrownBy(() -> CharSequenceSet.of(Arrays.asList("c", "b", null, "a"))) + .isInstanceOf(NullPointerException.class) + .hasMessage("Invalid object: null"); + } + + @Test + public void containsAll() { + CharSequenceSet set = CharSequenceSet.of(ImmutableList.of("b", "a")); + assertThatThrownBy(() -> set.containsAll(null)) + .isInstanceOf(NullPointerException.class) + .hasMessage("Invalid collection: null"); + + assertThatThrownBy(() -> set.containsAll(Collections.singletonList(null))) + .isInstanceOf(NullPointerException.class) + .hasMessage("Invalid object: null"); + + assertThatThrownBy(() -> set.containsAll(Arrays.asList("a", null))) + .isInstanceOf(NullPointerException.class) + .hasMessage("Invalid object: null"); + + assertThat(set.containsAll(ImmutableList.of("a", "b"))).isTrue(); + assertThat(set.containsAll(ImmutableList.of("b", "a", "c"))).isFalse(); + assertThat(set.containsAll(ImmutableList.of("b"))).isTrue(); + } + @Test public void testRetainAll() { + CharSequenceSet empty = CharSequenceSet.empty(); + assertThatThrownBy(() -> empty.retainAll(null)) + .isInstanceOf(NullPointerException.class) + .hasMessage("Invalid collection: null"); + + assertThatThrownBy(() -> empty.retainAll(Collections.singletonList(null))) + .isInstanceOf(NullPointerException.class) + .hasMessage("Invalid object: null"); + + assertThatThrownBy(() -> empty.retainAll(Arrays.asList("123", null))) + .isInstanceOf(NullPointerException.class) + .hasMessage("Invalid object: null"); + + assertThatThrownBy(() -> empty.retainAll(ImmutableList.of("456", "789", 123))) + .isInstanceOf(ClassCastException.class) + .hasMessage("Cannot cast java.lang.Integer to java.lang.CharSequence"); + CharSequenceSet set = CharSequenceSet.of(ImmutableList.of("123", "456")); - assertThat(set.retainAll(ImmutableList.of("456", "789", 123))) + assertThat(set.retainAll(ImmutableList.of("456", "789", "555"))) .overridingErrorMessage("Set should be changed") .isTrue(); @@ -61,24 +163,74 @@ public void testRetainAll() { .overridingErrorMessage("Set should not be changed") .isFalse(); - assertThat(set.retainAll(ImmutableList.of(123, 456))) + assertThat(set.retainAll(ImmutableList.of("555", "789"))) .overridingErrorMessage("Set should be changed") .isTrue(); assertThat(set).isEmpty(); } + @Test + public void toArray() { + CharSequenceSet set = CharSequenceSet.of(ImmutableList.of("b", "a")); + assertThat(set.toArray()).hasSize(2).containsExactly("b", "a"); + + CharSequence[] array = new CharSequence[1]; + assertThat(set.toArray(array)).hasSize(2).containsExactly("b", "a"); + + array = new CharSequence[0]; + assertThat(set.toArray(array)).hasSize(2).containsExactly("b", "a"); + + array = new CharSequence[5]; + assertThat(set.toArray(array)).hasSize(5).containsExactly("b", "a", null, null, null); + + array = new CharSequence[2]; + assertThat(set.toArray(array)).hasSize(2).containsExactly("b", "a"); + } + + @Test + public void remove() { + CharSequenceSet set = CharSequenceSet.of(ImmutableSet.of("a", "b", "c")); + assertThatThrownBy(() -> set.remove(null)) + .isInstanceOf(NullPointerException.class) + .hasMessage("Invalid object: null"); + + set.remove("a"); + assertThat(set).containsExactly("b", "c"); + set.remove("b"); + assertThat(set).containsExactly("c"); + set.remove("c"); + assertThat(set).isEmpty(); + } + @Test public void testRemoveAll() { + CharSequenceSet empty = CharSequenceSet.empty(); + assertThatThrownBy(() -> empty.removeAll(null)) + .isInstanceOf(NullPointerException.class) + .hasMessage("Invalid collection: null"); + + assertThatThrownBy(() -> empty.removeAll(Collections.singletonList(null))) + .isInstanceOf(NullPointerException.class) + .hasMessage("Invalid object: null"); + + assertThatThrownBy(() -> empty.removeAll(Arrays.asList("123", null))) + .isInstanceOf(NullPointerException.class) + .hasMessage("Invalid object: null"); + + assertThatThrownBy(() -> empty.removeAll(ImmutableList.of("123", 456))) + .isInstanceOf(ClassCastException.class) + .hasMessage("Cannot cast java.lang.Integer to java.lang.CharSequence"); + CharSequenceSet set = CharSequenceSet.of(ImmutableList.of("123", "456")); - assertThat(set.removeAll(ImmutableList.of("456", "789", 123))) + assertThat(set.removeAll(ImmutableList.of("456", "789"))) .overridingErrorMessage("Set should be changed") .isTrue(); assertThat(set).hasSize(1).contains("123"); set = CharSequenceSet.of(ImmutableList.of("123", "456")); - assertThat(set.removeAll(ImmutableList.of(123, 456))) + assertThat(set.removeAll(ImmutableList.of("333", "789"))) .overridingErrorMessage("Set should not be changed") .isFalse(); @@ -119,4 +271,17 @@ public void testEqualsAndHashCode() { .isEqualTo(set3.hashCode()) .isEqualTo(set4.hashCode()); } + + @Test + public void kryoSerialization() throws Exception { + CharSequenceSet charSequences = CharSequenceSet.of(ImmutableList.of("c", "b", "a")); + assertThat(TestHelpers.KryoHelpers.roundTripSerialize(charSequences)).isEqualTo(charSequences); + } + + @Test + public void javaSerialization() throws Exception { + CharSequenceSet charSequences = CharSequenceSet.of(ImmutableList.of("c", "b", "a")); + CharSequenceSet deserialize = TestHelpers.deserialize(TestHelpers.serialize(charSequences)); + assertThat(deserialize).isEqualTo(charSequences); + } }