From e16bfcffc9c1d57bbe331aa9d545919711a9dd6f Mon Sep 17 00:00:00 2001 From: Eduard Tudenhoefner Date: Mon, 8 Jan 2024 21:14:06 +0100 Subject: [PATCH] Core: Add JUnit5 version of TableTestBase (#9424) --- build.gradle | 1 + .../java/org/apache/iceberg/TestBase.java | 754 ++++++++++++++++++ .../iceberg/TestCreateSnapshotEvent.java | 105 +-- .../apache/iceberg/TestManifestReader.java | 88 +- .../io/TestGenericSortedPosDeleteWriter.java | 89 +-- 5 files changed, 869 insertions(+), 168 deletions(-) create mode 100644 core/src/test/java/org/apache/iceberg/TestBase.java diff --git a/build.gradle b/build.gradle index 0f1363db4fcc..952081c4ce42 100644 --- a/build.gradle +++ b/build.gradle @@ -407,6 +407,7 @@ project(':iceberg-data') { } test { + useJUnitPlatform() // Only for TestSplitScan as of Gradle 5.0+ maxHeapSize '1500m' } diff --git a/core/src/test/java/org/apache/iceberg/TestBase.java b/core/src/test/java/org/apache/iceberg/TestBase.java new file mode 100644 index 000000000000..6fc048ded85b --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/TestBase.java @@ -0,0 +1,754 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg; + +import static org.apache.iceberg.types.Types.NestedField.required; +import static org.assertj.core.api.Assertions.assertThat; + +import java.io.File; +import java.io.IOException; +import java.io.UncheckedIOException; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; +import java.util.Set; +import java.util.UUID; +import org.apache.iceberg.deletes.PositionDelete; +import org.apache.iceberg.io.FileIO; +import org.apache.iceberg.io.OutputFile; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet; +import org.apache.iceberg.relocated.com.google.common.collect.Iterators; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.relocated.com.google.common.collect.Sets; +import org.apache.iceberg.relocated.com.google.common.io.Files; +import org.apache.iceberg.types.Conversions; +import org.apache.iceberg.types.Types; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.api.io.TempDir; + +@ExtendWith(ParameterizedTestExtension.class) +public class TestBase { + // Schema passed to create tables + public static final Schema SCHEMA = + new Schema( + required(3, "id", Types.IntegerType.get()), required(4, "data", Types.StringType.get())); + + protected static final int BUCKETS_NUMBER = 16; + + // Partition spec used to create tables + public static final PartitionSpec SPEC = + PartitionSpec.builderFor(SCHEMA).bucket("data", BUCKETS_NUMBER).build(); + + static final DataFile FILE_A = + DataFiles.builder(SPEC) + .withPath("/path/to/data-a.parquet") + .withFileSizeInBytes(10) + .withPartitionPath("data_bucket=0") // easy way to set partition data for now + .withRecordCount(1) + .build(); + static final DataFile FILE_A2 = + DataFiles.builder(SPEC) + .withPath("/path/to/data-a-2.parquet") + .withFileSizeInBytes(10) + .withPartitionPath("data_bucket=0") // easy way to set partition data for now + .withRecordCount(1) + .build(); + static final DeleteFile FILE_A_DELETES = + FileMetadata.deleteFileBuilder(SPEC) + .ofPositionDeletes() + .withPath("/path/to/data-a-deletes.parquet") + .withFileSizeInBytes(10) + .withPartitionPath("data_bucket=0") // easy way to set partition data for now + .withRecordCount(1) + .build(); + // Equality delete files. + static final DeleteFile FILE_A2_DELETES = + FileMetadata.deleteFileBuilder(SPEC) + .ofEqualityDeletes(1) + .withPath("/path/to/data-a2-deletes.parquet") + .withFileSizeInBytes(10) + .withPartitionPath("data_bucket=0") + .withRecordCount(1) + .build(); + static final DataFile FILE_B = + DataFiles.builder(SPEC) + .withPath("/path/to/data-b.parquet") + .withFileSizeInBytes(10) + .withPartitionPath("data_bucket=1") // easy way to set partition data for now + .withRecordCount(1) + .withSplitOffsets(ImmutableList.of(1L)) + .build(); + static final DeleteFile FILE_B_DELETES = + FileMetadata.deleteFileBuilder(SPEC) + .ofPositionDeletes() + .withPath("/path/to/data-b-deletes.parquet") + .withFileSizeInBytes(10) + .withPartitionPath("data_bucket=1") // easy way to set partition data for now + .withRecordCount(1) + .build(); + static final DataFile FILE_C = + DataFiles.builder(SPEC) + .withPath("/path/to/data-c.parquet") + .withFileSizeInBytes(10) + .withPartitionPath("data_bucket=2") // easy way to set partition data for now + .withRecordCount(1) + .withSplitOffsets(ImmutableList.of(2L, 8L)) + .build(); + static final DeleteFile FILE_C2_DELETES = + FileMetadata.deleteFileBuilder(SPEC) + .ofEqualityDeletes(1) + .withPath("/path/to/data-c-deletes.parquet") + .withFileSizeInBytes(10) + .withPartitionPath("data_bucket=2") // easy way to set partition data for now + .withRecordCount(1) + .build(); + static final DataFile FILE_D = + DataFiles.builder(SPEC) + .withPath("/path/to/data-d.parquet") + .withFileSizeInBytes(10) + .withPartitionPath("data_bucket=3") // easy way to set partition data for now + .withRecordCount(1) + .withSplitOffsets(ImmutableList.of(0L, 3L, 6L)) + .build(); + static final DeleteFile FILE_D2_DELETES = + FileMetadata.deleteFileBuilder(SPEC) + .ofEqualityDeletes(1) + .withPath("/path/to/data-d-deletes.parquet") + .withFileSizeInBytes(10) + .withPartitionPath("data_bucket=3") // easy way to set partition data for now + .withRecordCount(1) + .build(); + static final DataFile FILE_WITH_STATS = + DataFiles.builder(SPEC) + .withPath("/path/to/data-with-stats.parquet") + .withMetrics( + new Metrics( + 10L, + ImmutableMap.of(3, 100L, 4, 200L), // column sizes + ImmutableMap.of(3, 90L, 4, 180L), // value counts + ImmutableMap.of(3, 10L, 4, 20L), // null value counts + ImmutableMap.of(3, 0L, 4, 0L), // nan value counts + ImmutableMap.of( + 3, + Conversions.toByteBuffer(Types.IntegerType.get(), 1), + 4, + Conversions.toByteBuffer(Types.IntegerType.get(), 2)), // lower bounds + ImmutableMap.of( + 3, + Conversions.toByteBuffer(Types.IntegerType.get(), 5), + 4, + Conversions.toByteBuffer(Types.IntegerType.get(), 10)) // upperbounds + )) + .withFileSizeInBytes(350) + .build(); + + static final FileIO FILE_IO = new TestTables.LocalFileIO(); + + @TempDir protected Path temp; + + @TempDir protected File tableDir = null; + protected File metadataDir = null; + public TestTables.TestTable table = null; + + @Parameters(name = "formatVersion = {0}") + protected static List parameters() { + return Arrays.asList(new Object[] {1}, new Object[] {2}); + } + + @Parameter protected int formatVersion; + + @SuppressWarnings("checkstyle:MemberName") + protected TableAssertions V1Assert; + + @SuppressWarnings("checkstyle:MemberName") + protected TableAssertions V2Assert; + + @BeforeEach + public void setupTable() throws Exception { + this.V1Assert = new TableAssertions(1, formatVersion); + this.V2Assert = new TableAssertions(2, formatVersion); + this.metadataDir = new File(tableDir, "metadata"); + this.table = create(SCHEMA, SPEC); + } + + @AfterEach + public void cleanupTables() { + TestTables.clearTables(); + } + + List listManifestFiles() { + return listManifestFiles(tableDir); + } + + List listManifestFiles(File tableDirToList) { + return Lists.newArrayList( + new File(tableDirToList, "metadata") + .listFiles( + (dir, name) -> + !name.startsWith("snap") + && Files.getFileExtension(name).equalsIgnoreCase("avro"))); + } + + public static long countAllMetadataFiles(File tableDir) { + return Arrays.stream(new File(tableDir, "metadata").listFiles()) + .filter(f -> f.isFile()) + .count(); + } + + protected TestTables.TestTable create(Schema schema, PartitionSpec spec) { + return TestTables.create(tableDir, "test", schema, spec, formatVersion); + } + + TestTables.TestTable load() { + return TestTables.load(tableDir, "test"); + } + + Integer version() { + return TestTables.metadataVersion("test"); + } + + public TableMetadata readMetadata() { + return TestTables.readMetadata("test"); + } + + ManifestFile writeManifest(DataFile... files) throws IOException { + return writeManifest(null, files); + } + + ManifestFile writeManifest(Long snapshotId, DataFile... files) throws IOException { + File manifestFile = temp.resolve("input.m0.avro").toFile(); + assertThat(manifestFile).doesNotExist(); + OutputFile outputFile = table.ops().io().newOutputFile(manifestFile.getCanonicalPath()); + + ManifestWriter writer = + ManifestFiles.write(formatVersion, table.spec(), outputFile, snapshotId); + try { + for (DataFile file : files) { + writer.add(file); + } + } finally { + writer.close(); + } + + return writer.toManifestFile(); + } + + ManifestFile writeManifest(String fileName, ManifestEntry... entries) throws IOException { + return writeManifest(null, fileName, entries); + } + + ManifestFile writeManifest(Long snapshotId, ManifestEntry... entries) throws IOException { + return writeManifest(snapshotId, "input.m0.avro", entries); + } + + @SuppressWarnings("unchecked") + > ManifestFile writeManifest( + Long snapshotId, String fileName, ManifestEntry... entries) throws IOException { + File manifestFile = temp.resolve(fileName).toFile(); + assertThat(manifestFile).doesNotExist(); + OutputFile outputFile = table.ops().io().newOutputFile(manifestFile.getCanonicalPath()); + + ManifestWriter writer; + if (entries[0].file() instanceof DataFile) { + writer = + (ManifestWriter) + ManifestFiles.write(formatVersion, table.spec(), outputFile, snapshotId); + } else { + writer = + (ManifestWriter) + ManifestFiles.writeDeleteManifest( + formatVersion, table.spec(), outputFile, snapshotId); + } + try { + for (ManifestEntry entry : entries) { + writer.addEntry((ManifestEntry) entry); + } + } finally { + writer.close(); + } + + return writer.toManifestFile(); + } + + ManifestFile writeDeleteManifest(int newFormatVersion, Long snapshotId, DeleteFile... deleteFiles) + throws IOException { + OutputFile manifestFile = + org.apache.iceberg.Files.localOutput( + FileFormat.AVRO.addExtension( + File.createTempFile("junit", null, temp.toFile()).toString())); + ManifestWriter writer = + ManifestFiles.writeDeleteManifest(newFormatVersion, SPEC, manifestFile, snapshotId); + try { + for (DeleteFile deleteFile : deleteFiles) { + writer.add(deleteFile); + } + } finally { + writer.close(); + } + return writer.toManifestFile(); + } + + ManifestFile writeManifestWithName(String name, DataFile... files) throws IOException { + File manifestFile = temp.resolve(name + ".avro").toFile(); + assertThat(manifestFile).doesNotExist(); + OutputFile outputFile = table.ops().io().newOutputFile(manifestFile.getCanonicalPath()); + + ManifestWriter writer = + ManifestFiles.write(formatVersion, table.spec(), outputFile, null); + try { + for (DataFile file : files) { + writer.add(file); + } + } finally { + writer.close(); + } + + return writer.toManifestFile(); + } + + > ManifestEntry manifestEntry( + ManifestEntry.Status status, Long snapshotId, F file) { + return manifestEntry(status, snapshotId, 0L, 0L, file); + } + + > ManifestEntry manifestEntry( + ManifestEntry.Status status, + Long snapshotId, + Long dataSequenceNumber, + Long fileSequenceNumber, + F file) { + + GenericManifestEntry entry = new GenericManifestEntry<>(table.spec().partitionType()); + switch (status) { + case ADDED: + if (dataSequenceNumber != null && dataSequenceNumber != 0) { + return entry.wrapAppend(snapshotId, dataSequenceNumber, file); + } else { + return entry.wrapAppend(snapshotId, file); + } + case EXISTING: + return entry.wrapExisting(snapshotId, dataSequenceNumber, fileSequenceNumber, file); + case DELETED: + return entry.wrapDelete(snapshotId, dataSequenceNumber, fileSequenceNumber, file); + default: + throw new IllegalArgumentException("Unexpected entry status: " + status); + } + } + + void validateSnapshot(Snapshot old, Snapshot snap, DataFile... newFiles) { + validateSnapshot(old, snap, null, newFiles); + } + + void validateSnapshot(Snapshot old, Snapshot snap, long sequenceNumber, DataFile... newFiles) { + validateSnapshot(old, snap, (Long) sequenceNumber, newFiles); + } + + @SuppressWarnings("checkstyle:HiddenField") + Snapshot commit(Table table, SnapshotUpdate snapshotUpdate, String branch) { + Snapshot snapshot; + if (branch.equals(SnapshotRef.MAIN_BRANCH)) { + snapshotUpdate.commit(); + snapshot = table.currentSnapshot(); + } else { + ((SnapshotProducer) snapshotUpdate.toBranch(branch)).commit(); + snapshot = table.snapshot(branch); + } + + return snapshot; + } + + Snapshot apply(SnapshotUpdate snapshotUpdate, String branch) { + if (branch.equals(SnapshotRef.MAIN_BRANCH)) { + return ((SnapshotProducer) snapshotUpdate).apply(); + } else { + return ((SnapshotProducer) snapshotUpdate.toBranch(branch)).apply(); + } + } + + void validateSnapshot(Snapshot old, Snapshot snap, Long sequenceNumber, DataFile... newFiles) { + assertThat(old != null ? Sets.newHashSet(old.deleteManifests(FILE_IO)) : ImmutableSet.of()) + .as("Should not change delete manifests") + .isEqualTo(Sets.newHashSet(snap.deleteManifests(FILE_IO))); + List oldManifests = old != null ? old.dataManifests(FILE_IO) : ImmutableList.of(); + + // copy the manifests to a modifiable list and remove the existing manifests + List newManifests = Lists.newArrayList(snap.dataManifests(FILE_IO)); + for (ManifestFile oldManifest : oldManifests) { + assertThat(newManifests.remove(oldManifest)) + .as("New snapshot should contain old manifests") + .isTrue(); + } + + assertThat(newManifests).as("Should create 1 new manifest and reuse old manifests").hasSize(1); + ManifestFile manifest = newManifests.get(0); + + long id = snap.snapshotId(); + Iterator newPaths = paths(newFiles).iterator(); + + for (ManifestEntry entry : ManifestFiles.read(manifest, FILE_IO).entries()) { + DataFile file = entry.file(); + if (sequenceNumber != null) { + V1Assert.assertEquals( + "Data sequence number should default to 0", 0, entry.dataSequenceNumber().longValue()); + V1Assert.assertEquals( + "Data sequence number should default to 0", + 0, + entry.file().dataSequenceNumber().longValue()); + + V2Assert.assertEquals( + "Data sequence number should match expected", + sequenceNumber, + entry.dataSequenceNumber()); + V2Assert.assertEquals( + "Data sequence number should match expected", + sequenceNumber, + entry.file().dataSequenceNumber()); + V2Assert.assertEquals( + "Sequence number should match expected", + snap.sequenceNumber(), + entry.dataSequenceNumber().longValue()); + + V2Assert.assertEquals( + "File sequence number should match expected", + sequenceNumber, + entry.file().fileSequenceNumber()); + V2Assert.assertEquals( + "File sequence number should match expected", + snap.sequenceNumber(), + entry.file().fileSequenceNumber().longValue()); + } + assertThat(file.path().toString()) + .as("Path should match expected") + .isEqualTo(newPaths.next()); + assertThat(entry.snapshotId()).as("File's snapshot ID should match").isEqualTo(id); + } + + assertThat(newPaths.hasNext()).as("Should find all files in the manifest").isFalse(); + + assertThat(snap.schemaId()).as("Schema ID should match").isEqualTo(table.schema().schemaId()); + } + + void validateTableFiles(Table tbl, DataFile... expectedFiles) { + Set expectedFilePaths = Sets.newHashSet(); + for (DataFile file : expectedFiles) { + expectedFilePaths.add(file.path()); + } + Set actualFilePaths = Sets.newHashSet(); + for (FileScanTask task : tbl.newScan().planFiles()) { + actualFilePaths.add(task.file().path()); + } + assertThat(actualFilePaths).as("Files should match").isEqualTo(expectedFilePaths); + } + + void validateBranchFiles(Table tbl, String ref, DataFile... expectedFiles) { + Set expectedFilePaths = Sets.newHashSet(); + for (DataFile file : expectedFiles) { + expectedFilePaths.add(file.path()); + } + Set actualFilePaths = Sets.newHashSet(); + for (FileScanTask task : tbl.newScan().useRef(ref).planFiles()) { + actualFilePaths.add(task.file().path()); + } + assertThat(actualFilePaths).as("Files should match").isEqualTo(expectedFilePaths); + } + + void validateBranchDeleteFiles(Table tbl, String branch, DeleteFile... expectedFiles) { + Set expectedFilePaths = Sets.newHashSet(); + for (DeleteFile file : expectedFiles) { + expectedFilePaths.add(file.path()); + } + Set actualFilePaths = Sets.newHashSet(); + for (FileScanTask task : tbl.newScan().useRef(branch).planFiles()) { + for (DeleteFile file : task.deletes()) { + actualFilePaths.add(file.path()); + } + } + assertThat(actualFilePaths).as("Delete files should match").isEqualTo(expectedFilePaths); + } + + List paths(DataFile... dataFiles) { + List paths = Lists.newArrayListWithExpectedSize(dataFiles.length); + for (DataFile file : dataFiles) { + paths.add(file.path().toString()); + } + return paths; + } + + void validateManifest( + ManifestFile manifest, Iterator ids, Iterator expectedFiles) { + validateManifest(manifest, null, null, ids, expectedFiles, null); + } + + void validateManifest( + ManifestFile manifest, + Iterator dataSeqs, + Iterator fileSeqs, + Iterator ids, + Iterator expectedFiles) { + validateManifest(manifest, dataSeqs, fileSeqs, ids, expectedFiles, null); + } + + void validateManifest( + ManifestFile manifest, + Iterator dataSeqs, + Iterator fileSeqs, + Iterator ids, + Iterator expectedFiles, + Iterator statuses) { + for (ManifestEntry entry : ManifestFiles.read(manifest, FILE_IO).entries()) { + DataFile file = entry.file(); + DataFile expected = expectedFiles.next(); + + validateManifestSequenceNumbers(entry, dataSeqs, fileSeqs); + + assertThat(file.path().toString()) + .as("Path should match expected") + .isEqualTo(expected.path().toString()); + assertThat(entry.snapshotId()) + .as("Snapshot ID should match expected ID") + .isEqualTo(ids.next()); + if (statuses != null) { + assertThat(entry.status()).as("Status should match expected").isEqualTo(statuses.next()); + } + } + + assertThat(expectedFiles).as("Should find all files in the manifest").isExhausted(); + } + + void validateDeleteManifest( + ManifestFile manifest, + Iterator dataSeqs, + Iterator fileSeqs, + Iterator ids, + Iterator expectedFiles, + Iterator statuses) { + for (ManifestEntry entry : + ManifestFiles.readDeleteManifest(manifest, FILE_IO, null).entries()) { + DeleteFile file = entry.file(); + DeleteFile expected = expectedFiles.next(); + + validateManifestSequenceNumbers(entry, dataSeqs, fileSeqs); + + assertThat(file.path().toString()) + .as("Path should match expected") + .isEqualTo(expected.path().toString()); + assertThat(entry.snapshotId()) + .as("Snapshot ID should match expected ID") + .isEqualTo(ids.next()); + assertThat(entry.status()).as("Status should match expected").isEqualTo(statuses.next()); + } + + assertThat(expectedFiles).as("Should find all files in the manifest").isExhausted(); + } + + private > void validateManifestSequenceNumbers( + ManifestEntry entry, Iterator dataSeqs, Iterator fileSeqs) { + if (dataSeqs != null) { + V1Assert.assertEquals( + "Data sequence number should default to 0", 0, entry.dataSequenceNumber().longValue()); + V1Assert.assertEquals( + "Data sequence number should default to 0", + 0, + entry.file().dataSequenceNumber().longValue()); + + Long expectedSequenceNumber = dataSeqs.next(); + V2Assert.assertEquals( + "Data sequence number should match expected", + expectedSequenceNumber, + entry.dataSequenceNumber()); + V2Assert.assertEquals( + "Data sequence number should match expected", + expectedSequenceNumber, + entry.file().dataSequenceNumber()); + } + + if (fileSeqs != null) { + V1Assert.assertEquals( + "File sequence number should default to 0", (Long) 0L, entry.fileSequenceNumber()); + V1Assert.assertEquals( + "File sequence number should default to 0", (Long) 0L, entry.file().fileSequenceNumber()); + + Long expectedFileSequenceNumber = fileSeqs.next(); + V2Assert.assertEquals( + "File sequence number should match", + expectedFileSequenceNumber, + entry.fileSequenceNumber()); + V2Assert.assertEquals( + "File sequence number should match", + expectedFileSequenceNumber, + entry.file().fileSequenceNumber()); + } + } + + protected DataFile newDataFile(String partitionPath) { + return DataFiles.builder(table.spec()) + .withPath("/path/to/data-" + UUID.randomUUID() + ".parquet") + .withFileSizeInBytes(10) + .withPartitionPath(partitionPath) + .withRecordCount(1) + .build(); + } + + protected DeleteFile newDeleteFile(int specId, String partitionPath) { + PartitionSpec spec = table.specs().get(specId); + return FileMetadata.deleteFileBuilder(spec) + .ofPositionDeletes() + .withPath("/path/to/delete-" + UUID.randomUUID() + ".parquet") + .withFileSizeInBytes(10) + .withPartitionPath(partitionPath) + .withRecordCount(1) + .build(); + } + + protected DeleteFile newEqualityDeleteFile(int specId, String partitionPath, int... fieldIds) { + PartitionSpec spec = table.specs().get(specId); + return FileMetadata.deleteFileBuilder(spec) + .ofEqualityDeletes(fieldIds) + .withPath("/path/to/delete-" + UUID.randomUUID() + ".parquet") + .withFileSizeInBytes(10) + .withPartitionPath(partitionPath) + .withRecordCount(1) + .build(); + } + + protected PositionDelete positionDelete(CharSequence path, long pos, T row) { + PositionDelete positionDelete = PositionDelete.create(); + return positionDelete.set(path, pos, row); + } + + protected void withUnavailableLocations(Iterable locations, Action action) { + for (String location : locations) { + move(location, location + "_temp"); + } + + try { + action.invoke(); + } finally { + for (String location : locations) { + move(location + "_temp", location); + } + } + } + + private void move(String location, String newLocation) { + Path path = Paths.get(location); + Path tempPath = Paths.get(newLocation); + + try { + java.nio.file.Files.move(path, tempPath); + } catch (IOException e) { + throw new UncheckedIOException("Failed to move: " + location, e); + } + } + + static void validateManifestEntries( + ManifestFile manifest, + Iterator ids, + Iterator expectedFiles, + Iterator expectedStatuses) { + for (ManifestEntry entry : ManifestFiles.read(manifest, FILE_IO).entries()) { + DataFile file = entry.file(); + DataFile expected = expectedFiles.next(); + final ManifestEntry.Status expectedStatus = expectedStatuses.next(); + assertThat(file.path().toString()) + .as("Path should match expected") + .isEqualTo(expected.path().toString()); + assertThat(entry.snapshotId()) + .as("Snapshot ID should match expected ID") + .isEqualTo(ids.next()); + assertThat(entry.status()).as("Status should match expected").isEqualTo(expectedStatus); + } + + assertThat(expectedFiles).as("Should find all files in the manifest").isExhausted(); + } + + static Iterator statuses(ManifestEntry.Status... statuses) { + return Iterators.forArray(statuses); + } + + static Iterator dataSeqs(Long... seqs) { + return Iterators.forArray(seqs); + } + + static Iterator fileSeqs(Long... seqs) { + return Iterators.forArray(seqs); + } + + static Iterator ids(Long... ids) { + return Iterators.forArray(ids); + } + + static Iterator files(DataFile... files) { + return Iterators.forArray(files); + } + + static Iterator files(DeleteFile... files) { + return Iterators.forArray(files); + } + + static Iterator files(ManifestFile manifest) { + return ManifestFiles.read(manifest, FILE_IO).iterator(); + } + + /** Used for assertions that only apply if the table version is v2. */ + protected static class TableAssertions { + private boolean enabled; + + private TableAssertions(int validForVersion, int formatVersion) { + this.enabled = validForVersion == formatVersion; + } + + void disable() { + this.enabled = false; + } + + void enable() { + this.enabled = true; + } + + void assertEquals(String context, int expected, int actual) { + if (enabled) { + assertThat(actual).as(context).isEqualTo(expected); + } + } + + void assertEquals(String context, long expected, long actual) { + if (enabled) { + assertThat(actual).as(context).isEqualTo(expected); + } + } + + void assertEquals(String context, Object expected, Object actual) { + if (enabled) { + assertThat(actual).as(context).isEqualTo(expected); + } + } + } + + @FunctionalInterface + protected interface Action { + void invoke(); + } +} diff --git a/core/src/test/java/org/apache/iceberg/TestCreateSnapshotEvent.java b/core/src/test/java/org/apache/iceberg/TestCreateSnapshotEvent.java index 2c9580bb842c..e2fa80707a2f 100644 --- a/core/src/test/java/org/apache/iceberg/TestCreateSnapshotEvent.java +++ b/core/src/test/java/org/apache/iceberg/TestCreateSnapshotEvent.java @@ -18,98 +18,63 @@ */ package org.apache.iceberg; +import static org.assertj.core.api.Assertions.assertThat; + import org.apache.iceberg.events.CreateSnapshotEvent; import org.apache.iceberg.events.Listener; import org.apache.iceberg.events.Listeners; -import org.junit.Assert; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.TestTemplate; -@RunWith(Parameterized.class) -public class TestCreateSnapshotEvent extends TableTestBase { - @Parameterized.Parameters(name = "formatVersion = {0}") - public static Object[] parameters() { - return new Object[] {1, 2}; - } +public class TestCreateSnapshotEvent extends TestBase { private CreateSnapshotEvent currentEvent; - public TestCreateSnapshotEvent(int formatVersion) { - super(formatVersion); + @BeforeEach + public void initListener() { Listeners.register(new MyListener(), CreateSnapshotEvent.class); } - @Test + @TestTemplate public void testAppendCommitEvent() { - Assert.assertEquals("Table should start empty", 0, listManifestFiles().size()); + assertThat(listManifestFiles()).as("Table should start empty").isEmpty(); table.newAppend().appendFile(FILE_A).commit(); - Assert.assertNotNull(currentEvent); - Assert.assertEquals( - "Added records in the table should be 1", "1", currentEvent.summary().get("added-records")); - Assert.assertEquals( - "Added files in the table should be 1", - "1", - currentEvent.summary().get("added-data-files")); - Assert.assertEquals( - "Total records in the table should be 1", "1", currentEvent.summary().get("total-records")); - Assert.assertEquals( - "Total data files in the table should be 1", - "1", - currentEvent.summary().get("total-data-files")); + assertThat(currentEvent).isNotNull(); + assertThat(currentEvent.summary()) + .containsEntry("added-records", "1") + .containsEntry("added-data-files", "1") + .containsEntry("total-records", "1") + .containsEntry("total-data-files", "1"); table.newAppend().appendFile(FILE_A).commit(); - Assert.assertNotNull(currentEvent); - Assert.assertEquals( - "Added records in the table should be 1", "1", currentEvent.summary().get("added-records")); - Assert.assertEquals( - "Added files in the table should be 1", - "1", - currentEvent.summary().get("added-data-files")); - Assert.assertEquals( - "Total records in the table should be 2", "2", currentEvent.summary().get("total-records")); - Assert.assertEquals( - "Total data files in the table should be 2", - "2", - currentEvent.summary().get("total-data-files")); + assertThat(currentEvent).isNotNull(); + assertThat(currentEvent.summary()) + .containsEntry("added-records", "1") + .containsEntry("added-data-files", "1") + .containsEntry("total-records", "2") + .containsEntry("total-data-files", "2"); } - @Test + @TestTemplate public void testAppendAndDeleteCommitEvent() { - Assert.assertEquals("Table should start empty", 0, listManifestFiles().size()); + assertThat(listManifestFiles()).as("Table should start empty").isEmpty(); table.newAppend().appendFile(FILE_A).appendFile(FILE_B).commit(); - Assert.assertNotNull(currentEvent); - Assert.assertEquals( - "Added records in the table should be 2", "2", currentEvent.summary().get("added-records")); - Assert.assertEquals( - "Added files in the table should be 2", - "2", - currentEvent.summary().get("added-data-files")); - Assert.assertEquals( - "Total records in the table should be 2", "2", currentEvent.summary().get("total-records")); - Assert.assertEquals( - "Total data files in the table should be 2", - "2", - currentEvent.summary().get("total-data-files")); + assertThat(currentEvent).as("Current event should not be null").isNotNull(); + assertThat(currentEvent.summary()) + .containsEntry("added-records", "2") + .containsEntry("added-data-files", "2") + .containsEntry("total-records", "2") + .containsEntry("total-data-files", "2"); table.newDelete().deleteFile(FILE_A).commit(); - Assert.assertNotNull(currentEvent); - Assert.assertEquals( - "Deleted records in the table should be 1", - "1", - currentEvent.summary().get("deleted-records")); - Assert.assertEquals( - "Deleted files in the table should be 1", - "1", - currentEvent.summary().get("deleted-data-files")); - Assert.assertEquals( - "Total records in the table should be 1", "1", currentEvent.summary().get("total-records")); - Assert.assertEquals( - "Total data files in the table should be 1", - "1", - currentEvent.summary().get("total-data-files")); + assertThat(currentEvent).as("Current event should not be null after delete").isNotNull(); + assertThat(currentEvent.summary()) + .containsEntry("deleted-records", "1") + .containsEntry("deleted-data-files", "1") + .containsEntry("total-records", "1") + .containsEntry("total-data-files", "1"); } class MyListener implements Listener { diff --git a/core/src/test/java/org/apache/iceberg/TestManifestReader.java b/core/src/test/java/org/apache/iceberg/TestManifestReader.java index 3bb9bf1a4c1d..44b09081d7a3 100644 --- a/core/src/test/java/org/apache/iceberg/TestManifestReader.java +++ b/core/src/test/java/org/apache/iceberg/TestManifestReader.java @@ -19,6 +19,7 @@ package org.apache.iceberg; import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assumptions.assumeThat; import java.io.IOException; import java.util.List; @@ -32,18 +33,9 @@ import org.apache.iceberg.types.Types; import org.assertj.core.api.Assertions; import org.assertj.core.api.recursive.comparison.RecursiveComparisonConfiguration; -import org.junit.Assert; -import org.junit.Assume; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; - -@RunWith(Parameterized.class) -public class TestManifestReader extends TableTestBase { - @Parameterized.Parameters(name = "formatVersion = {0}") - public static Object[] parameters() { - return new Object[] {1, 2}; - } +import org.junit.jupiter.api.TestTemplate; + +public class TestManifestReader extends TestBase { private static final RecursiveComparisonConfiguration FILE_COMPARISON_CONFIG = RecursiveComparisonConfiguration.builder() @@ -51,22 +43,18 @@ public static Object[] parameters() { "dataSequenceNumber", "fileOrdinal", "fileSequenceNumber", "fromProjectionPos") .build(); - public TestManifestReader(int formatVersion) { - super(formatVersion); - } - - @Test + @TestTemplate public void testManifestReaderWithEmptyInheritableMetadata() throws IOException { ManifestFile manifest = writeManifest(1000L, manifestEntry(Status.EXISTING, 1000L, FILE_A)); try (ManifestReader reader = ManifestFiles.read(manifest, FILE_IO)) { ManifestEntry entry = Iterables.getOnlyElement(reader.entries()); - Assert.assertEquals(Status.EXISTING, entry.status()); - Assert.assertEquals(FILE_A.path(), entry.file().path()); - Assert.assertEquals(1000L, (long) entry.snapshotId()); + assertThat(entry.status()).isEqualTo(Status.EXISTING); + assertThat(entry.file().path()).isEqualTo(FILE_A.path()); + assertThat(entry.snapshotId()).isEqualTo(1000L); } } - @Test + @TestTemplate public void testReaderWithFilterWithoutSelect() throws IOException { ManifestFile manifest = writeManifest(1000L, FILE_A, FILE_B, FILE_C); try (ManifestReader reader = @@ -82,7 +70,7 @@ public void testReaderWithFilterWithoutSelect() throws IOException { } } - @Test + @TestTemplate public void testInvalidUsage() throws IOException { ManifestFile manifest = writeManifest(FILE_A, FILE_B); Assertions.assertThatThrownBy(() -> ManifestFiles.read(manifest, FILE_IO)) @@ -90,23 +78,23 @@ public void testInvalidUsage() throws IOException { .hasMessage("Cannot read from ManifestFile with null (unassigned) snapshot ID"); } - @Test + @TestTemplate public void testManifestReaderWithPartitionMetadata() throws IOException { ManifestFile manifest = writeManifest(1000L, manifestEntry(Status.EXISTING, 123L, FILE_A)); try (ManifestReader reader = ManifestFiles.read(manifest, FILE_IO)) { ManifestEntry entry = Iterables.getOnlyElement(reader.entries()); - Assert.assertEquals(123L, (long) entry.snapshotId()); + assertThat(entry.snapshotId()).isEqualTo(123L); List fields = ((PartitionData) entry.file().partition()).getPartitionType().fields(); - Assert.assertEquals(1, fields.size()); - Assert.assertEquals(1000, fields.get(0).fieldId()); - Assert.assertEquals("data_bucket", fields.get(0).name()); - Assert.assertEquals(Types.IntegerType.get(), fields.get(0).type()); + assertThat(fields).hasSize(1); + assertThat(fields.get(0).fieldId()).isEqualTo(1000); + assertThat(fields.get(0).name()).isEqualTo("data_bucket"); + assertThat(fields.get(0).type()).isEqualTo(Types.IntegerType.get()); } } - @Test + @TestTemplate public void testManifestReaderWithUpdatedPartitionMetadataForV1Table() throws IOException { PartitionSpec spec = PartitionSpec.builderFor(table.schema()).bucket("id", 8).bucket("data", 16).build(); @@ -115,53 +103,55 @@ public void testManifestReaderWithUpdatedPartitionMetadataForV1Table() throws IO ManifestFile manifest = writeManifest(1000L, manifestEntry(Status.EXISTING, 123L, FILE_A)); try (ManifestReader reader = ManifestFiles.read(manifest, FILE_IO)) { ManifestEntry entry = Iterables.getOnlyElement(reader.entries()); - Assert.assertEquals(123L, (long) entry.snapshotId()); + assertThat(entry.snapshotId()).isEqualTo(123L); List fields = ((PartitionData) entry.file().partition()).getPartitionType().fields(); - Assert.assertEquals(2, fields.size()); - Assert.assertEquals(1000, fields.get(0).fieldId()); - Assert.assertEquals("id_bucket", fields.get(0).name()); - Assert.assertEquals(Types.IntegerType.get(), fields.get(0).type()); - - Assert.assertEquals(1001, fields.get(1).fieldId()); - Assert.assertEquals("data_bucket", fields.get(1).name()); - Assert.assertEquals(Types.IntegerType.get(), fields.get(1).type()); + assertThat(fields).hasSize(2); + assertThat(fields.get(0).fieldId()).isEqualTo(1000); + assertThat(fields.get(0).name()).isEqualTo("id_bucket"); + assertThat(fields.get(0).type()).isEqualTo(Types.IntegerType.get()); + + assertThat(fields.get(1).fieldId()).isEqualTo(1001); + assertThat(fields.get(1).name()).isEqualTo("data_bucket"); + assertThat(fields.get(1).type()).isEqualTo(Types.IntegerType.get()); } } - @Test + @TestTemplate public void testDataFilePositions() throws IOException { ManifestFile manifest = writeManifest(1000L, FILE_A, FILE_B, FILE_C); try (ManifestReader reader = ManifestFiles.read(manifest, FILE_IO)) { long expectedPos = 0L; for (DataFile file : reader) { - Assert.assertEquals("Position should match", (Long) expectedPos, file.pos()); - Assert.assertEquals( - "Position from field index should match", expectedPos, ((BaseFile) file).get(17)); + assertThat(file.pos()).as("Position should match").isEqualTo(expectedPos); + assertThat(((BaseFile) file).get(17)) + .as("Position from field index should match") + .isEqualTo(expectedPos); expectedPos += 1; } } } - @Test + @TestTemplate public void testDeleteFilePositions() throws IOException { - Assume.assumeTrue("Delete files only work for format version 2", formatVersion == 2); + assumeThat(formatVersion).as("Delete files only work for format version 2").isEqualTo(2); ManifestFile manifest = writeDeleteManifest(formatVersion, 1000L, FILE_A_DELETES, FILE_B_DELETES); try (ManifestReader reader = ManifestFiles.readDeleteManifest(manifest, FILE_IO, null)) { long expectedPos = 0L; for (DeleteFile file : reader) { - Assert.assertEquals("Position should match", (Long) expectedPos, file.pos()); - Assert.assertEquals( - "Position from field index should match", expectedPos, ((BaseFile) file).get(17)); + assertThat(file.pos()).as("Position should match").isEqualTo(expectedPos); + assertThat(((BaseFile) file).get(17)) + .as("Position from field index should match") + .isEqualTo(expectedPos); expectedPos += 1; } } } - @Test + @TestTemplate public void testDataFileSplitOffsetsNullWhenInvalid() throws IOException { DataFile invalidOffset = DataFiles.builder(SPEC) @@ -173,7 +163,7 @@ public void testDataFileSplitOffsetsNullWhenInvalid() throws IOException { ManifestFile manifest = writeManifest(1000L, invalidOffset); try (ManifestReader reader = ManifestFiles.read(manifest, FILE_IO)) { DataFile file = Iterables.getOnlyElement(reader); - Assertions.assertThat(file.splitOffsets()).isNull(); + assertThat(file.splitOffsets()).isNull(); } } } diff --git a/data/src/test/java/org/apache/iceberg/io/TestGenericSortedPosDeleteWriter.java b/data/src/test/java/org/apache/iceberg/io/TestGenericSortedPosDeleteWriter.java index 7aabb9d2eba0..a725a40aba15 100644 --- a/data/src/test/java/org/apache/iceberg/io/TestGenericSortedPosDeleteWriter.java +++ b/data/src/test/java/org/apache/iceberg/io/TestGenericSortedPosDeleteWriter.java @@ -18,17 +18,23 @@ */ package org.apache.iceberg.io; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + import java.io.File; import java.io.IOException; +import java.util.Arrays; import java.util.List; import org.apache.iceberg.DataFile; import org.apache.iceberg.DeleteFile; import org.apache.iceberg.FileFormat; import org.apache.iceberg.Files; +import org.apache.iceberg.Parameter; +import org.apache.iceberg.Parameters; import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.RowDelta; import org.apache.iceberg.Schema; -import org.apache.iceberg.TableTestBase; +import org.apache.iceberg.TestBase; import org.apache.iceberg.avro.Avro; import org.apache.iceberg.data.GenericAppenderFactory; import org.apache.iceberg.data.GenericRecord; @@ -43,37 +49,29 @@ import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.util.StructLikeSet; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; - -@RunWith(Parameterized.class) -public class TestGenericSortedPosDeleteWriter extends TableTestBase { +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.TestTemplate; + +public class TestGenericSortedPosDeleteWriter extends TestBase { private static final int FORMAT_V2 = 2; - private final FileFormat format; + @Parameter(index = 1) + private FileFormat format; private OutputFileFactory fileFactory; private Record gRecord; - @Parameterized.Parameters(name = "FileFormat={0}") - public static Object[] parameters() { - return new Object[][] {new Object[] {"avro"}, new Object[] {"orc"}, new Object[] {"parquet"}}; - } - - public TestGenericSortedPosDeleteWriter(String fileFormat) { - super(FORMAT_V2); - this.format = FileFormat.fromString(fileFormat); + @Parameters(name = "formatVersion = {0}, fileFormat = {1}") + public static List parameters() { + return Arrays.asList( + new Object[] {FORMAT_V2, FileFormat.AVRO}, + new Object[] {FORMAT_V2, FileFormat.ORC}, + new Object[] {FORMAT_V2, FileFormat.PARQUET}); } @Override - @Before - public void setupTable() throws IOException { - this.tableDir = temp.newFolder(); - Assert.assertTrue(tableDir.delete()); - + @BeforeEach + public void setupTable() throws Exception { this.metadataDir = new File(tableDir, "metadata"); this.table = create(SCHEMA, PartitionSpec.unpartitioned()); this.gRecord = GenericRecord.create(SCHEMA); @@ -121,7 +119,7 @@ private StructLikeSet actualRowSet(String... columns) throws IOException { return set; } - @Test + @TestTemplate public void testSortedPosDelete() throws IOException { List rowSet = Lists.newArrayList( @@ -144,7 +142,7 @@ public void testSortedPosDelete() throws IOException { } List deleteFiles = writer.complete(); - Assert.assertEquals(1, deleteFiles.size()); + assertThat(deleteFiles).hasSize(1); DeleteFile deleteFile = deleteFiles.get(0); // Check whether the path-pos pairs are sorted as expected. @@ -155,7 +153,7 @@ public void testSortedPosDelete() throws IOException { record.copy("file_path", dataFile.path(), "pos", 0L), record.copy("file_path", dataFile.path(), "pos", 2L), record.copy("file_path", dataFile.path(), "pos", 4L)); - Assert.assertEquals(expectedDeletes, readRecordsAsList(pathPosSchema, deleteFile.path())); + assertThat(readRecordsAsList(pathPosSchema, deleteFile.path())).isEqualTo(expectedDeletes); table .newRowDelta() @@ -166,11 +164,10 @@ public void testSortedPosDelete() throws IOException { .commit(); List expectedData = Lists.newArrayList(createRow(1, "bbb"), createRow(3, "ddd")); - Assert.assertEquals( - "Should have the expected records", expectedRowSet(expectedData), actualRowSet("*")); + assertThat(actualRowSet("*")).isEqualTo(expectedRowSet(expectedData)); } - @Test + @TestTemplate public void testSortedPosDeleteWithSchemaAndNullRow() throws IOException { List rowSet = Lists.newArrayList(createRow(0, "aaa"), createRow(1, "bbb"), createRow(2, "ccc")); @@ -180,19 +177,15 @@ public void testSortedPosDeleteWithSchemaAndNullRow() throws IOException { new GenericAppenderFactory(table.schema(), table.spec(), null, null, table.schema()); DataFile dataFile = prepareDataFile(appenderFactory, rowSet); - SortedPosDeleteWriter writer = - new SortedPosDeleteWriter<>(appenderFactory, fileFactory, format, null, 1); - boolean caughtError = false; - try { - writer.delete(dataFile.path(), 0L); - } catch (Exception e) { - caughtError = true; - } - Assert.assertTrue( - "Should fail because the appender are required non-null rows to write", caughtError); + // no check on the underlying error msg as it might be missing based on the JDK version + assertThatThrownBy( + () -> + new SortedPosDeleteWriter<>(appenderFactory, fileFactory, format, null, 1) + .delete(dataFile.path(), 0L)) + .isInstanceOf(Exception.class); } - @Test + @TestTemplate public void testSortedPosDeleteWithRow() throws IOException { List rowSet = Lists.newArrayList( @@ -216,7 +209,7 @@ public void testSortedPosDeleteWithRow() throws IOException { } List deleteFiles = writer.complete(); - Assert.assertEquals(1, deleteFiles.size()); + assertThat(deleteFiles).hasSize(1); DeleteFile deleteFile = deleteFiles.get(0); // Check whether the path-pos pairs are sorted as expected. @@ -227,7 +220,7 @@ public void testSortedPosDeleteWithRow() throws IOException { record.copy("file_path", dataFile.path(), "pos", 0L, "row", createRow(0, "aaa")), record.copy("file_path", dataFile.path(), "pos", 2L, "row", createRow(2, "ccc")), record.copy("file_path", dataFile.path(), "pos", 4L, "row", createRow(4, "eee"))); - Assert.assertEquals(expectedDeletes, readRecordsAsList(pathPosSchema, deleteFile.path())); + assertThat(readRecordsAsList(pathPosSchema, deleteFile.path())).isEqualTo(expectedDeletes); table .newRowDelta() @@ -238,11 +231,10 @@ public void testSortedPosDeleteWithRow() throws IOException { .commit(); List expectedData = Lists.newArrayList(createRow(1, "bbb"), createRow(3, "ddd")); - Assert.assertEquals( - "Should have the expected records", expectedRowSet(expectedData), actualRowSet("*")); + assertThat(actualRowSet("*")).isEqualTo(expectedRowSet(expectedData)); } - @Test + @TestTemplate public void testMultipleFlush() throws IOException { FileAppenderFactory appenderFactory = new GenericAppenderFactory(table.schema(), table.spec(), null, null, null); @@ -281,7 +273,7 @@ public void testMultipleFlush() throws IOException { } List deleteFiles = writer.complete(); - Assert.assertEquals(10, deleteFiles.size()); + assertThat(deleteFiles).hasSize(10); Schema pathPosSchema = DeleteSchemaUtil.pathPosSchema(); Record record = GenericRecord.create(pathPosSchema); @@ -295,15 +287,14 @@ public void testMultipleFlush() throws IOException { } DeleteFile deleteFile = deleteFiles.get(deleteFileIndex); - Assert.assertEquals(expectedDeletes, readRecordsAsList(pathPosSchema, deleteFile.path())); + assertThat(readRecordsAsList(pathPosSchema, deleteFile.path())).isEqualTo(expectedDeletes); } rowDelta = table.newRowDelta(); deleteFiles.forEach(rowDelta::addDeletes); rowDelta.commit(); - Assert.assertEquals( - "Should have no record.", expectedRowSet(ImmutableList.of()), actualRowSet("*")); + assertThat(actualRowSet("*")).isEqualTo(expectedRowSet(ImmutableList.of())); } private List readRecordsAsList(Schema schema, CharSequence path) throws IOException {