From 9e9b62d35125125b78384ede4515a62a8ca0ef6d Mon Sep 17 00:00:00 2001 From: Chinmay Bhat <12948588+chinmay-bhat@users.noreply.github.com> Date: Fri, 22 Dec 2023 21:38:40 +0530 Subject: [PATCH] Spark 3:5 Migrate tests to JUnit5 in source directory (#9342) --- .../org/apache/iceberg/RecordWrapperTest.java | 2 +- .../apache/iceberg/spark/CatalogTestBase.java | 43 ++- .../iceberg/spark/source/TestBaseReader.java | 88 +++-- .../spark/source/TestChangelogReader.java | 32 +- .../spark/source/TestDataFrameWriterV2.java | 30 +- .../spark/source/TestDataSourceOptions.java | 92 ++--- .../source/TestForwardCompatibility.java | 34 +- .../source/TestIcebergSourceHadoopTables.java | 6 +- .../source/TestIcebergSourceHiveTables.java | 8 +- .../source/TestIcebergSourceTablesBase.java | 273 +++++++------ .../spark/source/TestIcebergSpark.java | 132 +++---- .../spark/source/TestInternalRowWrapper.java | 17 +- .../TestMetadataTableReadableMetrics.java | 22 +- .../spark/source/TestPathIdentifier.java | 36 +- .../spark/source/TestReadProjection.java | 361 ++++++++++-------- .../spark/source/TestSparkAggregates.java | 24 +- .../TestSparkCatalogCacheExpiration.java | 34 +- .../spark/source/TestSparkDataFile.java | 22 +- .../spark/source/TestSparkReadMetrics.java | 108 +++--- .../spark/source/TestSparkStagedScan.java | 30 +- .../iceberg/spark/source/TestSparkTable.java | 23 +- .../spark/source/TestStreamingOffset.java | 14 +- .../spark/source/TestStructuredStreaming.java | 58 +-- .../spark/source/TestWriteMetricsConfig.java | 100 ++--- 24 files changed, 833 insertions(+), 756 deletions(-) diff --git a/data/src/test/java/org/apache/iceberg/RecordWrapperTest.java b/data/src/test/java/org/apache/iceberg/RecordWrapperTest.java index 1084958f528b..22b928d23883 100644 --- a/data/src/test/java/org/apache/iceberg/RecordWrapperTest.java +++ b/data/src/test/java/org/apache/iceberg/RecordWrapperTest.java @@ -24,7 +24,7 @@ import org.apache.iceberg.types.Types; import org.apache.iceberg.util.StructLikeWrapper; import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Test; public abstract class RecordWrapperTest { diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/CatalogTestBase.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/CatalogTestBase.java index dbb839eacc48..61f100ca5c09 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/CatalogTestBase.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/CatalogTestBase.java @@ -18,29 +18,40 @@ */ package org.apache.iceberg.spark; +import java.nio.file.Path; import java.util.Map; -import java.util.stream.Stream; -import org.junit.jupiter.params.provider.Arguments; +import org.apache.iceberg.ParameterizedTestExtension; +import org.apache.iceberg.Parameters; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.api.io.TempDir; +@ExtendWith(ParameterizedTestExtension.class) public abstract class CatalogTestBase extends TestBaseWithCatalog { // these parameters are broken out to avoid changes that need to modify lots of test suites - public static Stream parameters() { - return Stream.of( - Arguments.of( - SparkCatalogConfig.HIVE.catalogName(), - SparkCatalogConfig.HIVE.implementation(), - SparkCatalogConfig.HIVE.properties()), - Arguments.of( - SparkCatalogConfig.HADOOP.catalogName(), - SparkCatalogConfig.HADOOP.implementation(), - SparkCatalogConfig.HADOOP.properties()), - Arguments.of( - SparkCatalogConfig.SPARK.catalogName(), - SparkCatalogConfig.SPARK.implementation(), - SparkCatalogConfig.SPARK.properties())); + @Parameters(name = "catalogName = {0}, implementation = {1}, config = {2}") + public static Object[][] parameters() { + return new Object[][] { + { + SparkCatalogConfig.HIVE.catalogName(), + SparkCatalogConfig.HIVE.implementation(), + SparkCatalogConfig.HIVE.properties() + }, + { + SparkCatalogConfig.HADOOP.catalogName(), + SparkCatalogConfig.HADOOP.implementation(), + SparkCatalogConfig.HADOOP.properties() + }, + { + SparkCatalogConfig.SPARK.catalogName(), + SparkCatalogConfig.SPARK.implementation(), + SparkCatalogConfig.SPARK.properties() + } + }; } + @TempDir protected Path temp; + public CatalogTestBase(SparkCatalogConfig config) { super(config); } diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestBaseReader.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestBaseReader.java index 3d94966eb76c..27e7d7c496ef 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestBaseReader.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestBaseReader.java @@ -20,9 +20,11 @@ import static org.apache.iceberg.FileFormat.PARQUET; import static org.apache.iceberg.Files.localOutput; +import static org.assertj.core.api.Assertions.assertThat; import java.io.File; import java.io.IOException; +import java.nio.file.Path; import java.util.Iterator; import java.util.List; import java.util.Map; @@ -47,14 +49,12 @@ import org.apache.iceberg.relocated.com.google.common.collect.Maps; import org.apache.iceberg.spark.data.RandomData; import org.apache.iceberg.types.Types; -import org.junit.Assert; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; public class TestBaseReader { - @Rule public TemporaryFolder temp = new TemporaryFolder(); + @TempDir private Path temp; private Table table; @@ -129,15 +129,17 @@ public void testClosureOnDataExhaustion() throws IOException { int countRecords = 0; while (reader.next()) { countRecords += 1; - Assert.assertNotNull("Reader should return non-null value", reader.get()); + assertThat(reader.get()).as("Reader should return non-null value").isNotNull(); } - Assert.assertEquals( - "Reader returned incorrect number of records", totalTasks * recordPerTask, countRecords); + assertThat(totalTasks * recordPerTask) + .as("Reader returned incorrect number of records") + .isEqualTo(countRecords); tasks.forEach( t -> - Assert.assertTrue( - "All iterators should be closed after read exhausion", reader.isIteratorClosed(t))); + assertThat(reader.isIteratorClosed(t)) + .as("All iterators should be closed after read exhausion") + .isTrue()); } @Test @@ -145,28 +147,29 @@ public void testClosureDuringIteration() throws IOException { Integer totalTasks = 2; Integer recordPerTask = 1; List tasks = createFileScanTasks(totalTasks, recordPerTask); - Assert.assertEquals(2, tasks.size()); + assertThat(tasks).hasSize(2); FileScanTask firstTask = tasks.get(0); FileScanTask secondTask = tasks.get(1); ClosureTrackingReader reader = new ClosureTrackingReader(table, tasks); // Total of 2 elements - Assert.assertTrue(reader.next()); - Assert.assertFalse( - "First iter should not be closed on its last element", reader.isIteratorClosed(firstTask)); - - Assert.assertTrue(reader.next()); - Assert.assertTrue( - "First iter should be closed after moving to second iter", - reader.isIteratorClosed(firstTask)); - Assert.assertFalse( - "Second iter should not be closed on its last element", - reader.isIteratorClosed(secondTask)); - - Assert.assertFalse(reader.next()); - Assert.assertTrue(reader.isIteratorClosed(firstTask)); - Assert.assertTrue(reader.isIteratorClosed(secondTask)); + assertThat(reader.next()).isTrue(); + assertThat(reader.isIteratorClosed(firstTask)) + .as("First iter should not be closed on its last element") + .isFalse(); + + assertThat(reader.next()).isTrue(); + assertThat(reader.isIteratorClosed(firstTask)) + .as("First iter should be closed after moving to second iter") + .isTrue(); + assertThat(reader.isIteratorClosed(secondTask)) + .as("Second iter should not be closed on its last element") + .isFalse(); + + assertThat(reader.next()).isFalse(); + assertThat(reader.isIteratorClosed(firstTask)).isTrue(); + assertThat(reader.isIteratorClosed(secondTask)).isTrue(); } @Test @@ -181,8 +184,9 @@ public void testClosureWithoutAnyRead() throws IOException { tasks.forEach( t -> - Assert.assertFalse( - "Iterator should not be created eagerly for tasks", reader.hasIterator(t))); + assertThat(reader.hasIterator(t)) + .as("Iterator should not be created eagerly for tasks") + .isFalse()); } @Test @@ -195,8 +199,8 @@ public void testExplicitClosure() throws IOException { Integer halfDataSize = (totalTasks * recordPerTask) / 2; for (int i = 0; i < halfDataSize; i++) { - Assert.assertTrue("Reader should have some element", reader.next()); - Assert.assertNotNull("Reader should return non-null value", reader.get()); + assertThat(reader.next()).as("Reader should have some element").isTrue(); + assertThat(reader.get()).as("Reader should return non-null value").isNotNull(); } reader.close(); @@ -206,8 +210,9 @@ public void testExplicitClosure() throws IOException { tasks.forEach( t -> { if (reader.hasIterator(t)) { - Assert.assertTrue( - "Iterator should be closed after read exhausion", reader.isIteratorClosed(t)); + assertThat(reader.isIteratorClosed(t)) + .as("Iterator should be closed after read exhausion") + .isTrue(); } }); } @@ -222,20 +227,21 @@ public void testIdempotentExplicitClosure() throws IOException { // Total 100 elements, only 5 iterators have been created for (int i = 0; i < 45; i++) { - Assert.assertTrue("eader should have some element", reader.next()); - Assert.assertNotNull("Reader should return non-null value", reader.get()); + assertThat(reader.next()).as("Reader should have some element").isTrue(); + assertThat(reader.get()).as("Reader should return non-null value").isNotNull(); } for (int closeAttempt = 0; closeAttempt < 5; closeAttempt++) { reader.close(); for (int i = 0; i < 5; i++) { - Assert.assertTrue( - "Iterator should be closed after read exhausion", - reader.isIteratorClosed(tasks.get(i))); + assertThat(reader.isIteratorClosed(tasks.get(i))) + .as("Iterator should be closed after read exhausion") + .isTrue(); } for (int i = 5; i < 10; i++) { - Assert.assertFalse( - "Iterator should not be created eagerly for tasks", reader.hasIterator(tasks.get(i))); + assertThat(reader.hasIterator(tasks.get(i))) + .as("Iterator should not be created eagerly for tasks") + .isFalse(); } } } @@ -243,10 +249,10 @@ public void testIdempotentExplicitClosure() throws IOException { private List createFileScanTasks(Integer totalTasks, Integer recordPerTask) throws IOException { String desc = "make_scan_tasks"; - File parent = temp.newFolder(desc); + File parent = temp.resolve(desc).toFile(); File location = new File(parent, "test"); File dataFolder = new File(location, "data"); - Assert.assertTrue("mkdirs should succeed", dataFolder.mkdirs()); + assertThat(dataFolder.mkdirs()).as("mkdirs should succeed").isTrue(); Schema schema = new Schema(Types.NestedField.required(0, "id", Types.LongType.get())); diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestChangelogReader.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestChangelogReader.java index fc17547fad41..52d6ff8c9c8b 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestChangelogReader.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestChangelogReader.java @@ -20,8 +20,11 @@ import static org.apache.iceberg.types.Types.NestedField.optional; import static org.apache.iceberg.types.Types.NestedField.required; +import static org.assertj.core.api.Assertions.assertThat; +import java.io.File; import java.io.IOException; +import java.nio.file.Path; import java.util.List; import java.util.stream.Collectors; import org.apache.iceberg.ChangelogOperation; @@ -41,17 +44,15 @@ import org.apache.iceberg.io.CloseableIterable; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet; import org.apache.iceberg.relocated.com.google.common.collect.Lists; -import org.apache.iceberg.spark.SparkTestBase; +import org.apache.iceberg.spark.TestBase; import org.apache.iceberg.types.Types; import org.apache.spark.sql.catalyst.InternalRow; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; - -public class TestChangelogReader extends SparkTestBase { +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +public class TestChangelogReader extends TestBase { private static final Schema SCHEMA = new Schema( required(1, "id", Types.IntegerType.get()), optional(2, "data", Types.StringType.get())); @@ -64,9 +65,9 @@ public class TestChangelogReader extends SparkTestBase { private DataFile dataFile1; private DataFile dataFile2; - @Rule public TemporaryFolder temp = new TemporaryFolder(); + @TempDir private Path temp; - @Before + @BeforeEach public void before() throws IOException { table = catalog.createTable(TableIdentifier.of("default", "test"), SCHEMA, SPEC); // create some data @@ -85,7 +86,7 @@ public void before() throws IOException { dataFile2 = writeDataFile(records2); } - @After + @AfterEach public void after() { catalog.dropTable(TableIdentifier.of("default", "test")); } @@ -176,7 +177,7 @@ public void testDataFileRewrite() throws IOException { reader.close(); } - Assert.assertEquals("Should have no rows", 0, rows.size()); + assertThat(rows).as("Should have no rows").hasSize(0); } @Test @@ -254,6 +255,9 @@ private Object[] toJava(InternalRow row) { private DataFile writeDataFile(List records) throws IOException { // records all use IDs that are in bucket id_bucket=0 return FileHelpers.writeDataFile( - table, Files.localOutput(temp.newFile()), TestHelpers.Row.of(0), records); + table, + Files.localOutput(File.createTempFile("junit", null, temp.toFile())), + TestHelpers.Row.of(0), + records); } } diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestDataFrameWriterV2.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestDataFrameWriterV2.java index 6fbad46f96db..4959cd2a9e06 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestDataFrameWriterV2.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestDataFrameWriterV2.java @@ -18,11 +18,14 @@ */ package org.apache.iceberg.spark.source; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + import java.util.List; import org.apache.iceberg.TableProperties; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; import org.apache.iceberg.spark.Spark3Util; -import org.apache.iceberg.spark.SparkTestBaseWithCatalog; +import org.apache.iceberg.spark.TestBaseWithCatalog; import org.apache.iceberg.types.Types; import org.apache.spark.sql.AnalysisException; import org.apache.spark.sql.Dataset; @@ -32,19 +35,17 @@ import org.apache.spark.sql.catalyst.analysis.NoSuchTableException; import org.apache.spark.sql.catalyst.parser.ParseException; import org.apache.spark.sql.internal.SQLConf; -import org.assertj.core.api.Assertions; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; - -public class TestDataFrameWriterV2 extends SparkTestBaseWithCatalog { - @Before +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +public class TestDataFrameWriterV2 extends TestBaseWithCatalog { + @BeforeEach public void createTable() { sql("CREATE TABLE %s (id bigint, data string) USING iceberg", tableName); } - @After + @AfterEach public void removeTables() { sql("DROP TABLE IF EXISTS %s", tableName); } @@ -76,7 +77,7 @@ public void testMergeSchemaFailsWithoutWriterOption() throws Exception { // this has a different error message than the case without accept-any-schema because it uses // Iceberg checks - Assertions.assertThatThrownBy(() -> threeColDF.writeTo(tableName).append()) + assertThatThrownBy(() -> threeColDF.writeTo(tableName).append()) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Field new_col not found in source schema"); } @@ -102,8 +103,7 @@ public void testMergeSchemaWithoutAcceptAnySchema() throws Exception { "{ \"id\": 3, \"data\": \"c\", \"new_col\": 12.06 }", "{ \"id\": 4, \"data\": \"d\", \"new_col\": 14.41 }"); - Assertions.assertThatThrownBy( - () -> threeColDF.writeTo(tableName).option("merge-schema", "true").append()) + assertThatThrownBy(() -> threeColDF.writeTo(tableName).option("merge-schema", "true").append()) .isInstanceOf(AnalysisException.class) .hasMessageContaining( "Cannot write to `testhadoop`.`default`.`table`, the reason is too many data columns"); @@ -201,12 +201,12 @@ public void testWriteWithCaseSensitiveOption() throws NoSuchTableException, Pars List fields = Spark3Util.loadIcebergTable(sparkSession, tableName).schema().asStruct().fields(); // Additional columns should not be created - Assert.assertEquals(2, fields.size()); + assertThat(fields).hasSize(2); // enable spark.sql.caseSensitive sparkSession.sql(String.format("SET %s=true", SQLConf.CASE_SENSITIVE().key())); ds.writeTo(tableName).option("merge-schema", "true").option("check-ordering", "false").append(); fields = Spark3Util.loadIcebergTable(sparkSession, tableName).schema().asStruct().fields(); - Assert.assertEquals(4, fields.size()); + assertThat(fields).hasSize(4); } } diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestDataSourceOptions.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestDataSourceOptions.java index 83d8953735c5..00c9083e25fc 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestDataSourceOptions.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestDataSourceOptions.java @@ -19,9 +19,12 @@ package org.apache.iceberg.spark.source; import static org.apache.iceberg.types.Types.NestedField.optional; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; import java.io.IOException; import java.math.RoundingMode; +import java.nio.file.Path; import java.util.List; import java.util.Map; import org.apache.hadoop.conf.Configuration; @@ -43,8 +46,8 @@ import org.apache.iceberg.relocated.com.google.common.math.LongMath; import org.apache.iceberg.spark.CommitMetadata; import org.apache.iceberg.spark.SparkReadOptions; -import org.apache.iceberg.spark.SparkTestBaseWithCatalog; import org.apache.iceberg.spark.SparkWriteOptions; +import org.apache.iceberg.spark.TestBaseWithCatalog; import org.apache.iceberg.types.Types; import org.apache.iceberg.util.SnapshotUtil; import org.apache.spark.sql.Column; @@ -54,15 +57,12 @@ import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.catalyst.analysis.NoSuchTableException; -import org.assertj.core.api.Assertions; -import org.junit.AfterClass; -import org.junit.Assert; -import org.junit.BeforeClass; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; -public class TestDataSourceOptions extends SparkTestBaseWithCatalog { +public class TestDataSourceOptions extends TestBaseWithCatalog { private static final Configuration CONF = new Configuration(); private static final Schema SCHEMA = @@ -70,14 +70,14 @@ public class TestDataSourceOptions extends SparkTestBaseWithCatalog { optional(1, "id", Types.IntegerType.get()), optional(2, "data", Types.StringType.get())); private static SparkSession spark = null; - @Rule public TemporaryFolder temp = new TemporaryFolder(); + @TempDir private Path temp; - @BeforeClass + @BeforeAll public static void startSpark() { TestDataSourceOptions.spark = SparkSession.builder().master("local[2]").getOrCreate(); } - @AfterClass + @AfterAll public static void stopSpark() { SparkSession currentSpark = TestDataSourceOptions.spark; TestDataSourceOptions.spark = null; @@ -86,7 +86,7 @@ public static void stopSpark() { @Test public void testWriteFormatOptionOverridesTableProperties() throws IOException { - String tableLocation = temp.newFolder("iceberg-table").toString(); + String tableLocation = temp.resolve("iceberg-table").toFile().toString(); HadoopTables tables = new HadoopTables(CONF); PartitionSpec spec = PartitionSpec.unpartitioned(); @@ -109,14 +109,14 @@ public void testWriteFormatOptionOverridesTableProperties() throws IOException { tasks.forEach( task -> { FileFormat fileFormat = FileFormat.fromFileName(task.file().path()); - Assert.assertEquals(FileFormat.PARQUET, fileFormat); + assertThat(fileFormat).isEqualTo(FileFormat.PARQUET); }); } } @Test public void testNoWriteFormatOption() throws IOException { - String tableLocation = temp.newFolder("iceberg-table").toString(); + String tableLocation = temp.resolve("iceberg-table").toFile().toString(); HadoopTables tables = new HadoopTables(CONF); PartitionSpec spec = PartitionSpec.unpartitioned(); @@ -134,14 +134,14 @@ public void testNoWriteFormatOption() throws IOException { tasks.forEach( task -> { FileFormat fileFormat = FileFormat.fromFileName(task.file().path()); - Assert.assertEquals(FileFormat.AVRO, fileFormat); + assertThat(fileFormat).isEqualTo(FileFormat.AVRO); }); } } @Test public void testHadoopOptions() throws IOException { - String tableLocation = temp.newFolder("iceberg-table").toString(); + String tableLocation = temp.resolve("iceberg-table").toFile().toString(); Configuration sparkHadoopConf = spark.sessionState().newHadoopConf(); String originalDefaultFS = sparkHadoopConf.get("fs.default.name"); @@ -175,7 +175,7 @@ public void testHadoopOptions() throws IOException { List resultRecords = resultDf.orderBy("id").as(Encoders.bean(SimpleRecord.class)).collectAsList(); - Assert.assertEquals("Records should match", expectedRecords, resultRecords); + assertThat(resultRecords).as("Records should match").isEqualTo(expectedRecords); } finally { sparkHadoopConf.set("fs.default.name", originalDefaultFS); } @@ -183,7 +183,7 @@ public void testHadoopOptions() throws IOException { @Test public void testSplitOptionsOverridesTableProperties() throws IOException { - String tableLocation = temp.newFolder("iceberg-table").toString(); + String tableLocation = temp.resolve("iceberg-table").toFile().toString(); HadoopTables tables = new HadoopTables(CONF); PartitionSpec spec = PartitionSpec.unpartitioned(); @@ -207,7 +207,7 @@ public void testSplitOptionsOverridesTableProperties() throws IOException { List files = Lists.newArrayList(icebergTable.currentSnapshot().addedDataFiles(icebergTable.io())); - Assert.assertEquals("Should have written 1 file", 1, files.size()); + assertThat(files).as("Should have written 1 file").hasSize(1); long fileSize = files.get(0).fileSizeInBytes(); long splitSize = LongMath.divide(fileSize, 2, RoundingMode.CEILING); @@ -219,12 +219,14 @@ public void testSplitOptionsOverridesTableProperties() throws IOException { .option(SparkReadOptions.SPLIT_SIZE, String.valueOf(splitSize)) .load(tableLocation); - Assert.assertEquals("Spark partitions should match", 2, resultDf.javaRDD().getNumPartitions()); + assertThat(resultDf.javaRDD().getNumPartitions()) + .as("Spark partitions should match") + .isEqualTo(2); } @Test public void testIncrementalScanOptions() throws IOException { - String tableLocation = temp.newFolder("iceberg-table").toString(); + String tableLocation = temp.resolve("iceberg-table").toFile().toString(); HadoopTables tables = new HadoopTables(CONF); PartitionSpec spec = PartitionSpec.unpartitioned(); @@ -245,7 +247,7 @@ public void testIncrementalScanOptions() throws IOException { List snapshotIds = SnapshotUtil.currentAncestorIds(table); // start-snapshot-id and snapshot-id are both configured. - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> spark .read() @@ -259,7 +261,7 @@ public void testIncrementalScanOptions() throws IOException { "Cannot set start-snapshot-id and end-snapshot-id for incremental scans when either snapshot-id or as-of-timestamp is set"); // end-snapshot-id and as-of-timestamp are both configured. - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> spark .read() @@ -275,7 +277,7 @@ public void testIncrementalScanOptions() throws IOException { "Cannot set start-snapshot-id and end-snapshot-id for incremental scans when either snapshot-id or as-of-timestamp is set"); // only end-snapshot-id is configured. - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> spark .read() @@ -297,7 +299,7 @@ public void testIncrementalScanOptions() throws IOException { .orderBy("id") .as(Encoders.bean(SimpleRecord.class)) .collectAsList(); - Assert.assertEquals("Records should match", expectedRecords.subList(1, 4), result); + assertThat(result).as("Records should match").isEqualTo(expectedRecords.subList(1, 4)); // test (2nd snapshot, 3rd snapshot] incremental scan. Dataset resultDf = @@ -309,13 +311,13 @@ public void testIncrementalScanOptions() throws IOException { .load(tableLocation); List result1 = resultDf.orderBy("id").as(Encoders.bean(SimpleRecord.class)).collectAsList(); - Assert.assertEquals("Records should match", expectedRecords.subList(2, 3), result1); - Assert.assertEquals("Unprocessed count should match record count", 1, resultDf.count()); + assertThat(result1).as("Records should match").isEqualTo(expectedRecords.subList(2, 3)); + assertThat(resultDf.count()).as("Unprocessed count should match record count").isEqualTo(1); } @Test public void testMetadataSplitSizeOptionOverrideTableProperties() throws IOException { - String tableLocation = temp.newFolder("iceberg-table").toString(); + String tableLocation = temp.resolve("iceberg-table").toFile().toString(); HadoopTables tables = new HadoopTables(CONF); PartitionSpec spec = PartitionSpec.unpartitioned(); @@ -332,7 +334,7 @@ public void testMetadataSplitSizeOptionOverrideTableProperties() throws IOExcept List manifests = table.currentSnapshot().allManifests(table.io()); - Assert.assertEquals("Must be 2 manifests", 2, manifests.size()); + assertThat(manifests).as("Must be 2 manifests").hasSize(2); // set the target metadata split size so each manifest ends up in a separate split table @@ -341,7 +343,7 @@ public void testMetadataSplitSizeOptionOverrideTableProperties() throws IOExcept .commit(); Dataset entriesDf = spark.read().format("iceberg").load(tableLocation + "#entries"); - Assert.assertEquals("Num partitions must match", 2, entriesDf.javaRDD().getNumPartitions()); + assertThat(entriesDf.javaRDD().getNumPartitions()).as("Num partitions must match").isEqualTo(2); // override the table property using options entriesDf = @@ -350,12 +352,12 @@ public void testMetadataSplitSizeOptionOverrideTableProperties() throws IOExcept .format("iceberg") .option(SparkReadOptions.SPLIT_SIZE, String.valueOf(128 * 1024 * 1024)) .load(tableLocation + "#entries"); - Assert.assertEquals("Num partitions must match", 1, entriesDf.javaRDD().getNumPartitions()); + assertThat(entriesDf.javaRDD().getNumPartitions()).as("Num partitions must match").isEqualTo(1); } @Test public void testDefaultMetadataSplitSize() throws IOException { - String tableLocation = temp.newFolder("iceberg-table").toString(); + String tableLocation = temp.resolve("iceberg-table").toFile().toString(); HadoopTables tables = new HadoopTables(CONF); PartitionSpec spec = PartitionSpec.unpartitioned(); @@ -384,12 +386,12 @@ public void testDefaultMetadataSplitSize() throws IOException { Dataset metadataDf = spark.read().format("iceberg").load(tableLocation + "#entries"); int partitionNum = metadataDf.javaRDD().getNumPartitions(); - Assert.assertEquals("Spark partitions should match", expectedSplits, partitionNum); + assertThat(partitionNum).as("Spark partitions should match").isEqualTo(expectedSplits); } @Test public void testExtraSnapshotMetadata() throws IOException { - String tableLocation = temp.newFolder("iceberg-table").toString(); + String tableLocation = temp.resolve("iceberg-table").toFile().toString(); HadoopTables tables = new HadoopTables(CONF); tables.create(SCHEMA, PartitionSpec.unpartitioned(), Maps.newHashMap(), tableLocation); @@ -407,13 +409,14 @@ public void testExtraSnapshotMetadata() throws IOException { Table table = tables.load(tableLocation); - Assert.assertTrue(table.currentSnapshot().summary().get("extra-key").equals("someValue")); - Assert.assertTrue(table.currentSnapshot().summary().get("another-key").equals("anotherValue")); + assertThat(table.currentSnapshot().summary()) + .containsEntry("extra-key", "someValue") + .containsEntry("another-key", "anotherValue"); } @Test public void testExtraSnapshotMetadataWithSQL() throws InterruptedException, IOException { - String tableLocation = temp.newFolder("iceberg-table").toString(); + String tableLocation = temp.resolve("iceberg-table").toFile().toString(); HadoopTables tables = new HadoopTables(CONF); Table table = @@ -448,9 +451,9 @@ public void testExtraSnapshotMetadataWithSQL() throws InterruptedException, IOEx writerThread.join(); List snapshots = Lists.newArrayList(table.snapshots()); - Assert.assertEquals(2, snapshots.size()); - Assert.assertNull(snapshots.get(0).summary().get("writer-thread")); - Assertions.assertThat(snapshots.get(1).summary()) + assertThat(snapshots).hasSize(2); + assertThat(snapshots.get(0).summary().get("writer-thread")).isNull(); + assertThat(snapshots.get(1).summary()) .containsEntry("writer-thread", "test-extra-commit-message-writer-thread") .containsEntry("extra-key", "someValue") .containsEntry("another-key", "anotherValue"); @@ -491,9 +494,10 @@ public void testExtraSnapshotMetadataWithDelete() Table table = validationCatalog.loadTable(tableIdent); List snapshots = Lists.newArrayList(table.snapshots()); - Assert.assertEquals(2, snapshots.size()); - Assert.assertNull(snapshots.get(0).summary().get("writer-thread")); - Assertions.assertThat(snapshots.get(1).summary()) + + assertThat(snapshots).hasSize(2); + assertThat(snapshots.get(0).summary().get("writer-thread")).isNull(); + assertThat(snapshots.get(1).summary()) .containsEntry("writer-thread", "test-extra-commit-message-delete-thread") .containsEntry("extra-key", "someValue") .containsEntry("another-key", "anotherValue"); diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestForwardCompatibility.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestForwardCompatibility.java index 446989d1af3e..84c99a575c8d 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestForwardCompatibility.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestForwardCompatibility.java @@ -20,9 +20,12 @@ import static org.apache.iceberg.Files.localInput; import static org.apache.iceberg.Files.localOutput; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; import java.io.File; import java.io.IOException; +import java.nio.file.Path; import java.util.List; import java.util.UUID; import java.util.concurrent.TimeoutException; @@ -54,13 +57,10 @@ import org.apache.spark.sql.execution.streaming.MemoryStream; import org.apache.spark.sql.streaming.StreamingQuery; import org.apache.spark.sql.streaming.StreamingQueryException; -import org.assertj.core.api.Assertions; -import org.junit.AfterClass; -import org.junit.Assert; -import org.junit.BeforeClass; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; import scala.Option; import scala.collection.JavaConverters; @@ -87,16 +87,16 @@ public class TestForwardCompatibility { .addField("identity", 1, "id_zero") .build(); - @Rule public TemporaryFolder temp = new TemporaryFolder(); + @TempDir private Path temp; private static SparkSession spark = null; - @BeforeClass + @BeforeAll public static void startSpark() { TestForwardCompatibility.spark = SparkSession.builder().master("local[2]").getOrCreate(); } - @AfterClass + @AfterAll public static void stopSpark() { SparkSession currentSpark = TestForwardCompatibility.spark; TestForwardCompatibility.spark = null; @@ -105,7 +105,7 @@ public static void stopSpark() { @Test public void testSparkWriteFailsUnknownTransform() throws IOException { - File parent = temp.newFolder("avro"); + File parent = temp.resolve("avro").toFile(); File location = new File(parent, "test"); File dataFolder = new File(location, "data"); dataFolder.mkdirs(); @@ -119,7 +119,7 @@ public void testSparkWriteFailsUnknownTransform() throws IOException { Dataset df = spark.createDataFrame(expected, SimpleRecord.class); - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> df.select("id", "data") .write() @@ -132,7 +132,7 @@ public void testSparkWriteFailsUnknownTransform() throws IOException { @Test public void testSparkStreamingWriteFailsUnknownTransform() throws IOException, TimeoutException { - File parent = temp.newFolder("avro"); + File parent = temp.resolve("avro").toFile(); File location = new File(parent, "test"); File dataFolder = new File(location, "data"); dataFolder.mkdirs(); @@ -157,14 +157,14 @@ public void testSparkStreamingWriteFailsUnknownTransform() throws IOException, T List batch1 = Lists.newArrayList(1, 2); send(batch1, inputStream); - Assertions.assertThatThrownBy(query::processAllAvailable) + assertThatThrownBy(query::processAllAvailable) .isInstanceOf(StreamingQueryException.class) .hasMessageEndingWith("Cannot write using unsupported transforms: zero"); } @Test public void testSparkCanReadUnknownTransform() throws IOException { - File parent = temp.newFolder("avro"); + File parent = temp.resolve("avro").toFile(); File location = new File(parent, "test"); File dataFolder = new File(location, "data"); dataFolder.mkdirs(); @@ -194,7 +194,7 @@ public void testSparkCanReadUnknownTransform() throws IOException { .withPartitionPath("id_zero=0") .build(); - OutputFile manifestFile = localOutput(FileFormat.AVRO.addExtension(temp.newFile().toString())); + OutputFile manifestFile = localOutput(FileFormat.AVRO.addExtension(temp.toFile().toString())); ManifestWriter manifestWriter = ManifestFiles.write(FAKE_SPEC, manifestFile); try { manifestWriter.add(file); @@ -207,7 +207,7 @@ public void testSparkCanReadUnknownTransform() throws IOException { Dataset df = spark.read().format("iceberg").load(location.toString()); List rows = df.collectAsList(); - Assert.assertEquals("Should contain 100 rows", 100, rows.size()); + assertThat(rows).as("Should contain 100 rows").hasSize(100); for (int i = 0; i < expected.size(); i += 1) { TestHelpers.assertEqualsSafe(table.schema().asStruct(), expected.get(i), rows.get(i)); diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestIcebergSourceHadoopTables.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestIcebergSourceHadoopTables.java index 746415818c84..ff0b76ed0e3f 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestIcebergSourceHadoopTables.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestIcebergSourceHadoopTables.java @@ -26,7 +26,7 @@ import org.apache.iceberg.Table; import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.hadoop.HadoopTables; -import org.junit.Before; +import org.junit.jupiter.api.BeforeEach; public class TestIcebergSourceHadoopTables extends TestIcebergSourceTablesBase { @@ -35,9 +35,9 @@ public class TestIcebergSourceHadoopTables extends TestIcebergSourceTablesBase { File tableDir = null; String tableLocation = null; - @Before + @BeforeEach public void setupTable() throws Exception { - this.tableDir = temp.newFolder(); + this.tableDir = temp.toFile(); tableDir.delete(); // created by table create this.tableLocation = tableDir.toURI().toString(); diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestIcebergSourceHiveTables.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestIcebergSourceHiveTables.java index 2a264b74b0e2..9120bbcc35a3 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestIcebergSourceHiveTables.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestIcebergSourceHiveTables.java @@ -27,14 +27,14 @@ import org.apache.iceberg.Table; import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.TableIdentifier; -import org.junit.After; -import org.junit.BeforeClass; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeAll; public class TestIcebergSourceHiveTables extends TestIcebergSourceTablesBase { private static TableIdentifier currentIdentifier; - @BeforeClass + @BeforeAll public static void start() { Namespace db = Namespace.of("db"); if (!catalog.namespaceExists(db)) { @@ -42,7 +42,7 @@ public static void start() { } } - @After + @AfterEach public void dropTable() throws IOException { if (!catalog.tableExists(currentIdentifier)) { return; diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestIcebergSourceTablesBase.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestIcebergSourceTablesBase.java index 486713e52e30..29ccba5a27c7 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestIcebergSourceTablesBase.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestIcebergSourceTablesBase.java @@ -22,10 +22,13 @@ import static org.apache.iceberg.ManifestContent.DELETES; import static org.apache.iceberg.types.Types.NestedField.optional; import static org.apache.iceberg.types.Types.NestedField.required; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; import java.io.File; import java.io.IOException; import java.io.UncheckedIOException; +import java.nio.file.Path; import java.time.LocalDateTime; import java.time.ZoneOffset; import java.util.Arrays; @@ -74,8 +77,8 @@ import org.apache.iceberg.spark.SparkSQLProperties; import org.apache.iceberg.spark.SparkSchemaUtil; import org.apache.iceberg.spark.SparkTableUtil; -import org.apache.iceberg.spark.SparkTestBase; import org.apache.iceberg.spark.SparkWriteOptions; +import org.apache.iceberg.spark.TestBase; import org.apache.iceberg.spark.actions.SparkActions; import org.apache.iceberg.spark.data.TestHelpers; import org.apache.iceberg.types.Types; @@ -91,14 +94,11 @@ import org.apache.spark.sql.functions; import org.apache.spark.sql.internal.SQLConf; import org.apache.spark.sql.types.StructType; -import org.assertj.core.api.Assertions; -import org.junit.After; -import org.junit.Assert; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; -public abstract class TestIcebergSourceTablesBase extends SparkTestBase { +public abstract class TestIcebergSourceTablesBase extends TestBase { private static final Schema SCHEMA = new Schema( @@ -117,7 +117,7 @@ public abstract class TestIcebergSourceTablesBase extends SparkTestBase { private static final PartitionSpec SPEC = PartitionSpec.builderFor(SCHEMA).identity("id").build(); - @Rule public TemporaryFolder temp = new TemporaryFolder(); + @TempDir protected Path temp; public abstract Table createTable( TableIdentifier ident, Schema schema, PartitionSpec spec, Map properties); @@ -130,7 +130,7 @@ public abstract Table createTable( public abstract void dropTable(TableIdentifier ident) throws IOException; - @After + @AfterEach public void removeTable() { spark.sql("DROP TABLE IF EXISTS parquet_table"); } @@ -160,7 +160,7 @@ public synchronized void testTablesSupport() { List actualRecords = resultDf.orderBy("id").as(Encoders.bean(SimpleRecord.class)).collectAsList(); - Assert.assertEquals("Records should match", expectedRecords, actualRecords); + assertThat(actualRecords).as("Records should match").isEqualTo(expectedRecords); } @Test @@ -187,8 +187,7 @@ public void testEntriesTable() throws Exception { Snapshot snapshot = table.currentSnapshot(); - Assert.assertEquals( - "Should only contain one manifest", 1, snapshot.allManifests(table.io()).size()); + assertThat(snapshot.allManifests(table.io())).as("Should only contain one manifest").hasSize(1); InputFile manifest = table.io().newInputFile(snapshot.allManifests(table.io()).get(0).path()); List expected = Lists.newArrayList(); @@ -205,8 +204,8 @@ public void testEntriesTable() throws Exception { }); } - Assert.assertEquals("Entries table should have one row", 1, expected.size()); - Assert.assertEquals("Actual results should have one row", 1, actual.size()); + assertThat(expected).as("Entries table should have one row").hasSize(1); + assertThat(actual).as("Actual results should have one row").hasSize(1); TestHelpers.assertEqualsSafe( TestHelpers.nonDerivedSchema(entriesTableDs), expected.get(0), actual.get(0)); } @@ -236,8 +235,8 @@ public void testEntriesTablePartitionedPrune() { .select("status") .collectAsList(); - Assert.assertEquals("Results should contain only one status", 1, actual.size()); - Assert.assertEquals("That status should be Added (1)", 1, actual.get(0).getInt(0)); + assertThat(actual).as("Results should contain only one status").hasSize(1); + assertThat(actual.get(0).getInt(0)).as("That status should be Added (1)").isEqualTo(1); } @Test @@ -408,8 +407,9 @@ public void testAllEntriesTable() throws Exception { expected.sort(Comparator.comparing(o -> (Long) o.get("snapshot_id"))); - Assert.assertEquals("Entries table should have 3 rows", 3, expected.size()); - Assert.assertEquals("Actual results should have 3 rows", 3, actual.size()); + assertThat(expected).as("Entries table should have 3 rows").hasSize(3); + assertThat(actual).as("Actual results should have 3 rows").hasSize(3); + for (int i = 0; i < expected.size(); i += 1) { TestHelpers.assertEqualsSafe( TestHelpers.nonDerivedSchema(entriesTableDs), expected.get(i), actual.get(i)); @@ -434,16 +434,20 @@ public void testCountEntriesTable() { final int expectedEntryCount = 1; // count entries - Assert.assertEquals( - "Count should return " + expectedEntryCount, - expectedEntryCount, - spark.read().format("iceberg").load(loadLocation(tableIdentifier, "entries")).count()); + assertThat( + spark.read().format("iceberg").load(loadLocation(tableIdentifier, "entries")).count()) + .as("Count should return " + expectedEntryCount) + .isEqualTo(expectedEntryCount); // count all_entries - Assert.assertEquals( - "Count should return " + expectedEntryCount, - expectedEntryCount, - spark.read().format("iceberg").load(loadLocation(tableIdentifier, "all_entries")).count()); + assertThat( + spark + .read() + .format("iceberg") + .load(loadLocation(tableIdentifier, "all_entries")) + .count()) + .as("Count should return " + expectedEntryCount) + .isEqualTo(expectedEntryCount); } @Test @@ -492,8 +496,8 @@ public void testFilesTable() throws Exception { } } - Assert.assertEquals("Files table should have one row", 1, expected.size()); - Assert.assertEquals("Actual results should have one row", 1, actual.size()); + assertThat(expected).as("Files table should have one row").hasSize(1); + assertThat(actual).as("Actual results should have one row").hasSize(1); TestHelpers.assertEqualsSafe( TestHelpers.nonDerivedSchema(filesTableDs), expected.get(0), actual.get(0)); @@ -510,7 +514,7 @@ public void testFilesTableWithSnapshotIdInheritance() throws Exception { String.format( "CREATE TABLE parquet_table (data string, id int) " + "USING parquet PARTITIONED BY (id) LOCATION '%s'", - temp.newFolder())); + temp.toFile())); List records = Lists.newArrayList(new SimpleRecord(1, "a"), new SimpleRecord(2, "b")); @@ -548,8 +552,8 @@ public void testFilesTableWithSnapshotIdInheritance() throws Exception { } Types.StructType struct = TestHelpers.nonDerivedSchema(filesTableDs); - Assert.assertEquals("Files table should have one row", 2, expected.size()); - Assert.assertEquals("Actual results should have one row", 2, actual.size()); + assertThat(expected).as("Files table should have 2 rows").hasSize(2); + assertThat(actual).as("Actual results should have 2 rows").hasSize(2); TestHelpers.assertEqualsSafe(struct, expected.get(0), actual.get(0)); TestHelpers.assertEqualsSafe(struct, expected.get(1), actual.get(1)); } @@ -566,7 +570,7 @@ public void testV1EntriesTableWithSnapshotIdInheritance() throws Exception { String.format( "CREATE TABLE parquet_table (data string, id int) " + "USING parquet PARTITIONED BY (id) LOCATION '%s'", - temp.newFolder())); + temp.toFile())); List records = Lists.newArrayList(new SimpleRecord(1, "a"), new SimpleRecord(2, "b")); @@ -593,11 +597,11 @@ public void testV1EntriesTableWithSnapshotIdInheritance() throws Exception { long snapshotId = table.currentSnapshot().snapshotId(); - Assert.assertEquals("Entries table should have 2 rows", 2, actual.size()); - Assert.assertEquals("Sequence number must match", 0, actual.get(0).getLong(0)); - Assert.assertEquals("Snapshot id must match", snapshotId, actual.get(0).getLong(1)); - Assert.assertEquals("Sequence number must match", 0, actual.get(1).getLong(0)); - Assert.assertEquals("Snapshot id must match", snapshotId, actual.get(1).getLong(1)); + assertThat(actual).as("Entries table should have 2 rows").hasSize(2); + assertThat(actual.get(0).getLong(0)).as("Sequence number must match").isEqualTo(0); + assertThat(actual.get(0).getLong(1)).as("Snapshot id must match").isEqualTo(snapshotId); + assertThat(actual.get(1).getLong(0)).as("Sequence number must match").isEqualTo(0); + assertThat(actual.get(1).getLong(1)).as("Snapshot id must match").isEqualTo(snapshotId); } @Test @@ -650,8 +654,8 @@ public void testFilesUnpartitionedTable() throws Exception { } } - Assert.assertEquals("Files table should have one row", 1, expected.size()); - Assert.assertEquals("Actual results should have one row", 1, actual.size()); + assertThat(expected).as("Files table should have one row").hasSize(1); + assertThat(actual).as("Actual results should have one row").hasSize(1); TestHelpers.assertEqualsSafe( TestHelpers.nonDerivedSchema(filesTableDs), expected.get(0), actual.get(0)); } @@ -702,12 +706,11 @@ public void testAllMetadataTablesWithStagedCommits() { .load(loadLocation(tableIdentifier, "all_entries")) .collectAsList(); - Assert.assertTrue( - "Stage table should have some snapshots", table.snapshots().iterator().hasNext()); - Assert.assertNull("Stage table should have null currentSnapshot", table.currentSnapshot()); - Assert.assertEquals("Actual results should have two rows", 2, actualAllData.size()); - Assert.assertEquals("Actual results should have two rows", 2, actualAllManifests.size()); - Assert.assertEquals("Actual results should have two rows", 2, actualAllEntries.size()); + assertThat(table.snapshots().iterator()).as("Stage table should have some snapshots").hasNext(); + assertThat(table.currentSnapshot()).as("Stage table should have null currentSnapshot").isNull(); + assertThat(actualAllData).as("Actual results should have two rows").hasSize(2); + assertThat(actualAllManifests).as("Actual results should have two rows").hasSize(2); + assertThat(actualAllEntries).as("Actual results should have two rows").hasSize(2); } @Test @@ -765,8 +768,8 @@ public void testAllDataFilesTable() throws Exception { expected.sort(Comparator.comparing(o -> o.get("file_path").toString())); - Assert.assertEquals("Files table should have two rows", 2, expected.size()); - Assert.assertEquals("Actual results should have two rows", 2, actual.size()); + assertThat(expected).as("Files table should have two rows").hasSize(2); + assertThat(actual).as("Actual results should have two rows").hasSize(2); for (int i = 0; i < expected.size(); i += 1) { TestHelpers.assertEqualsSafe( TestHelpers.nonDerivedSchema(filesTableDs), expected.get(i), actual.get(i)); @@ -857,7 +860,7 @@ public void testHistoryTable() { .set("is_current_ancestor", true) .build()); - Assert.assertEquals("History table should have a row for each commit", 4, actual.size()); + assertThat(actual).as("History table should have a row for each commit").hasSize(4); TestHelpers.assertEqualsSafe(historyTable.schema().asStruct(), expected.get(0), actual.get(0)); TestHelpers.assertEqualsSafe(historyTable.schema().asStruct(), expected.get(1), actual.get(1)); TestHelpers.assertEqualsSafe(historyTable.schema().asStruct(), expected.get(2), actual.get(2)); @@ -936,7 +939,7 @@ public void testSnapshotsTable() { "total-data-files", "0")) .build()); - Assert.assertEquals("Snapshots table should have a row for each snapshot", 2, actual.size()); + assertThat(actual).as("Snapshots table should have a row for each snapshot").hasSize(2); TestHelpers.assertEqualsSafe(snapTable.schema().asStruct(), expected.get(0), actual.get(0)); TestHelpers.assertEqualsSafe(snapTable.schema().asStruct(), expected.get(1), actual.get(1)); } @@ -1009,7 +1012,7 @@ public void testPrunedSnapshotsTable() { "total-data-files", "0")) .build()); - Assert.assertEquals("Snapshots table should have a row for each snapshot", 2, actual.size()); + assertThat(actual).as("Snapshots table should have a row for each snapshot").hasSize(2); TestHelpers.assertEqualsSafe(projectedSchema.asStruct(), expected.get(0), actual.get(0)); TestHelpers.assertEqualsSafe(projectedSchema.asStruct(), expected.get(1), actual.get(1)); } @@ -1094,7 +1097,7 @@ public void testManifestsTable() { .build())) .build()); - Assert.assertEquals("Manifests table should have two manifest rows", 2, actual.size()); + assertThat(actual).as("Manifests table should have two manifest rows").hasSize(2); TestHelpers.assertEqualsSafe(manifestTable.schema().asStruct(), expected.get(0), actual.get(0)); TestHelpers.assertEqualsSafe(manifestTable.schema().asStruct(), expected.get(1), actual.get(1)); } @@ -1117,7 +1120,7 @@ public void testPruneManifestsTable() { if (!spark.version().startsWith("2")) { // Spark 2 isn't able to actually push down nested struct projections so this will not break - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> spark .read() @@ -1175,7 +1178,7 @@ public void testPruneManifestsTable() { .build())) .build()); - Assert.assertEquals("Manifests table should have one manifest row", 1, actual.size()); + assertThat(actual).as("Manifests table should have one manifest row").hasSize(1); TestHelpers.assertEqualsSafe(projectedSchema.asStruct(), expected.get(0), actual.get(0)); } @@ -1227,7 +1230,7 @@ public void testAllManifestsTable() { .sorted(Comparator.comparing(o -> o.get("path").toString())) .collect(Collectors.toList()); - Assert.assertEquals("Manifests table should have 5 manifest rows", 5, actual.size()); + assertThat(actual).as("Manifests table should have 5 manifest rows").hasSize(5); for (int i = 0; i < expected.size(); i += 1) { TestHelpers.assertEqualsSafe( manifestTable.schema().asStruct(), expected.get(i), actual.get(i)); @@ -1290,10 +1293,9 @@ public void testUnpartitionedPartitionsTable() { Table partitionsTable = loadTable(tableIdentifier, "partitions"); - Assert.assertEquals( - "Schema should not have partition field", - expectedSchema, - partitionsTable.schema().asStruct()); + assertThat(expectedSchema) + .as("Schema should not have partition field") + .isEqualTo(partitionsTable.schema().asStruct()); GenericRecordBuilder builder = new GenericRecordBuilder(AvroSchemaUtil.convert(partitionsTable.schema(), "partitions")); @@ -1319,7 +1321,7 @@ public void testUnpartitionedPartitionsTable() { .load(loadLocation(tableIdentifier, "partitions")) .collectAsList(); - Assert.assertEquals("Unpartitioned partitions table should have one row", 1, actual.size()); + assertThat(actual).as("Unpartitioned partitions table should have one row").hasSize(1); TestHelpers.assertEqualsSafe(expectedSchema, expectedRow, actual.get(0)); } @@ -1400,8 +1402,8 @@ public void testPartitionsTable() { .set("last_updated_snapshot_id", secondCommitId) .build()); - Assert.assertEquals("Partitions table should have two rows", 2, expected.size()); - Assert.assertEquals("Actual results should have two rows", 2, actual.size()); + assertThat(expected).as("Partitions table should have two rows").hasSize(2); + assertThat(actual).as("Actual results should have two rows").hasSize(2); for (int i = 0; i < 2; i += 1) { TestHelpers.assertEqualsSafe( partitionsTable.schema().asStruct(), expected.get(i), actual.get(i)); @@ -1417,7 +1419,7 @@ public void testPartitionsTable() { .orderBy("partition.id") .collectAsList(); - Assert.assertEquals("Actual results should have one row", 1, actualAfterFirstCommit.size()); + assertThat(actualAfterFirstCommit).as("Actual results should have one row").hasSize(1); TestHelpers.assertEqualsSafe( partitionsTable.schema().asStruct(), expected.get(0), actualAfterFirstCommit.get(0)); @@ -1429,7 +1431,8 @@ public void testPartitionsTable() { .load(loadLocation(tableIdentifier, "partitions")) .filter("partition.id < 2") .collectAsList(); - Assert.assertEquals("Actual results should have one row", 1, filtered.size()); + + assertThat(filtered).as("Actual results should have one row").hasSize(1); TestHelpers.assertEqualsSafe( partitionsTable.schema().asStruct(), expected.get(0), filtered.get(0)); @@ -1440,7 +1443,8 @@ public void testPartitionsTable() { .load(loadLocation(tableIdentifier, "partitions")) .filter("partition.id < 2 or record_count=1") .collectAsList(); - Assert.assertEquals("Actual results should have two row", 2, nonFiltered.size()); + + assertThat(nonFiltered).as("Actual results should have two rows").hasSize(2); for (int i = 0; i < 2; i += 1) { TestHelpers.assertEqualsSafe( partitionsTable.schema().asStruct(), expected.get(i), actual.get(i)); @@ -1481,12 +1485,11 @@ public void testPartitionsTableLastUpdatedSnapshot() { // check if rewrite manifest does not override metadata about data file's creating snapshot RewriteManifests.Result rewriteManifestResult = SparkActions.get().rewriteManifests(table).execute(); - Assert.assertEquals( - "rewrite replaced 2 manifests", - 2, - Iterables.size(rewriteManifestResult.rewrittenManifests())); - Assert.assertEquals( - "rewrite added 1 manifests", 1, Iterables.size(rewriteManifestResult.addedManifests())); + assertThat(rewriteManifestResult.rewrittenManifests()) + .as("rewrite replaced 2 manifests") + .hasSize(2); + + assertThat(rewriteManifestResult.addedManifests()).as("rewrite added 1 manifests").hasSize(1); List actual = spark @@ -1538,8 +1541,8 @@ public void testPartitionsTableLastUpdatedSnapshot() { .set("last_updated_snapshot_id", secondCommitId) .build()); - Assert.assertEquals("Partitions table should have two rows", 2, expected.size()); - Assert.assertEquals("Actual results should have two rows", 2, actual.size()); + assertThat(expected).as("Partitions table should have two rows").hasSize(2); + assertThat(actual).as("Actual results should have two rows").hasSize(2); for (int i = 0; i < 2; i += 1) { TestHelpers.assertEqualsSafe( partitionsTable.schema().asStruct(), expected.get(i), actual.get(i)); @@ -1553,7 +1556,7 @@ public void testPartitionsTableLastUpdatedSnapshot() { .load(loadLocation(tableIdentifier, "partitions")) .filter("partition.id < 2") .collectAsList(); - Assert.assertEquals("Actual results should have one row", 1, filtered.size()); + assertThat(filtered).as("Actual results should have one row").hasSize(1); TestHelpers.assertEqualsSafe( partitionsTable.schema().asStruct(), expected.get(0), filtered.get(0)); @@ -1584,8 +1587,7 @@ public void testPartitionsTableLastUpdatedSnapshot() { .format("iceberg") .load(loadLocation(tableIdentifier, "partitions")) .collectAsList(); - Assert.assertEquals( - "Actual results should have two row", 2, actualAfterSnapshotExpiration.size()); + assertThat(actualAfterSnapshotExpiration).as("Actual results should have two rows").hasSize(2); for (int i = 0; i < 2; i += 1) { TestHelpers.assertEqualsSafe( partitionsTable.schema().asStruct(), @@ -1634,7 +1636,7 @@ public void testPartitionsTableDeleteStats() { .load(loadLocation(tableIdentifier, "partitions")) .orderBy("partition.id") .collectAsList(); - Assert.assertEquals("Actual results should have two rows", 2, actual.size()); + assertThat(actual).as("Actual results should have two rows").hasSize(2); GenericRecordBuilder builder = new GenericRecordBuilder(AvroSchemaUtil.convert(partitionsTable.schema(), "partitions")); @@ -1693,7 +1695,7 @@ public void testPartitionsTableDeleteStats() { .load(loadLocation(tableIdentifier, "partitions")) .orderBy("partition.id") .collectAsList(); - Assert.assertEquals("Actual results should have two rows", 2, actual.size()); + assertThat(actual).as("Actual results should have two rows").hasSize(2); expected.remove(0); expected.add( 0, @@ -1735,8 +1737,9 @@ public synchronized void testSnapshotReadAfterAddColumn() { table.refresh(); Dataset resultDf = spark.read().format("iceberg").load(loadLocation(tableIdentifier)); - Assert.assertEquals( - "Records should match", originalRecords, resultDf.orderBy("id").collectAsList()); + assertThat(originalRecords) + .as("Records should match") + .isEqualTo(resultDf.orderBy("id").collectAsList()); Snapshot snapshotBeforeAddColumn = table.currentSnapshot(); @@ -1765,8 +1768,9 @@ public synchronized void testSnapshotReadAfterAddColumn() { RowFactory.create(5, "xyz", "C")); Dataset resultDf2 = spark.read().format("iceberg").load(loadLocation(tableIdentifier)); - Assert.assertEquals( - "Records should match", updatedRecords, resultDf2.orderBy("id").collectAsList()); + assertThat(updatedRecords) + .as("Records should match") + .isEqualTo(resultDf2.orderBy("id").collectAsList()); Dataset resultDf3 = spark @@ -1774,9 +1778,12 @@ public synchronized void testSnapshotReadAfterAddColumn() { .format("iceberg") .option(SparkReadOptions.SNAPSHOT_ID, snapshotBeforeAddColumn.snapshotId()) .load(loadLocation(tableIdentifier)); - Assert.assertEquals( - "Records should match", originalRecords, resultDf3.orderBy("id").collectAsList()); - Assert.assertEquals("Schemas should match", originalSparkSchema, resultDf3.schema()); + + assertThat(originalRecords) + .as("Records should match") + .isEqualTo(resultDf3.orderBy("id").collectAsList()); + + assertThat(resultDf3.schema()).as("Schemas should match").isEqualTo(originalSparkSchema); } @Test @@ -1802,8 +1809,10 @@ public synchronized void testSnapshotReadAfterDropColumn() { table.refresh(); Dataset resultDf = spark.read().format("iceberg").load(loadLocation(tableIdentifier)); - Assert.assertEquals( - "Records should match", originalRecords, resultDf.orderBy("id").collectAsList()); + + assertThat(resultDf.orderBy("id").collectAsList()) + .as("Records should match") + .isEqualTo(originalRecords); long tsBeforeDropColumn = waitUntilAfter(System.currentTimeMillis()); table.updateSchema().deleteColumn("data").commit(); @@ -1831,8 +1840,9 @@ public synchronized void testSnapshotReadAfterDropColumn() { RowFactory.create(5, "C")); Dataset resultDf2 = spark.read().format("iceberg").load(loadLocation(tableIdentifier)); - Assert.assertEquals( - "Records should match", updatedRecords, resultDf2.orderBy("id").collectAsList()); + assertThat(resultDf2.orderBy("id").collectAsList()) + .as("Records should match") + .isEqualTo(updatedRecords); Dataset resultDf3 = spark @@ -1840,9 +1850,12 @@ public synchronized void testSnapshotReadAfterDropColumn() { .format("iceberg") .option(SparkReadOptions.AS_OF_TIMESTAMP, tsBeforeDropColumn) .load(loadLocation(tableIdentifier)); - Assert.assertEquals( - "Records should match", originalRecords, resultDf3.orderBy("id").collectAsList()); - Assert.assertEquals("Schemas should match", originalSparkSchema, resultDf3.schema()); + + assertThat(resultDf3.orderBy("id").collectAsList()) + .as("Records should match") + .isEqualTo(originalRecords); + + assertThat(resultDf3.schema()).as("Schemas should match").isEqualTo(originalSparkSchema); // At tsAfterDropColumn, there has been a schema change, but no new snapshot, // so the snapshot as of tsAfterDropColumn is the same as that as of tsBeforeDropColumn. @@ -1852,9 +1865,12 @@ public synchronized void testSnapshotReadAfterDropColumn() { .format("iceberg") .option(SparkReadOptions.AS_OF_TIMESTAMP, tsAfterDropColumn) .load(loadLocation(tableIdentifier)); - Assert.assertEquals( - "Records should match", originalRecords, resultDf4.orderBy("id").collectAsList()); - Assert.assertEquals("Schemas should match", originalSparkSchema, resultDf4.schema()); + + assertThat(resultDf4.orderBy("id").collectAsList()) + .as("Records should match") + .isEqualTo(originalRecords); + + assertThat(resultDf4.schema()).as("Schemas should match").isEqualTo(originalSparkSchema); } @Test @@ -1878,8 +1894,10 @@ public synchronized void testSnapshotReadAfterAddAndDropColumn() { table.refresh(); Dataset resultDf = spark.read().format("iceberg").load(loadLocation(tableIdentifier)); - Assert.assertEquals( - "Records should match", originalRecords, resultDf.orderBy("id").collectAsList()); + + assertThat(resultDf.orderBy("id").collectAsList()) + .as("Records should match") + .isEqualTo(originalRecords); Snapshot snapshotBeforeAddColumn = table.currentSnapshot(); @@ -1908,8 +1926,10 @@ public synchronized void testSnapshotReadAfterAddAndDropColumn() { RowFactory.create(5, "xyz", "C")); Dataset resultDf2 = spark.read().format("iceberg").load(loadLocation(tableIdentifier)); - Assert.assertEquals( - "Records should match", updatedRecords, resultDf2.orderBy("id").collectAsList()); + + assertThat(resultDf2.orderBy("id").collectAsList()) + .as("Records should match") + .isEqualTo(updatedRecords); table.updateSchema().deleteColumn("data").commit(); @@ -1922,8 +1942,10 @@ public synchronized void testSnapshotReadAfterAddAndDropColumn() { RowFactory.create(5, "C")); Dataset resultDf3 = spark.read().format("iceberg").load(loadLocation(tableIdentifier)); - Assert.assertEquals( - "Records should match", recordsAfterDropColumn, resultDf3.orderBy("id").collectAsList()); + + assertThat(resultDf3.orderBy("id").collectAsList()) + .as("Records should match") + .isEqualTo(recordsAfterDropColumn); Dataset resultDf4 = spark @@ -1931,9 +1953,12 @@ public synchronized void testSnapshotReadAfterAddAndDropColumn() { .format("iceberg") .option(SparkReadOptions.SNAPSHOT_ID, snapshotBeforeAddColumn.snapshotId()) .load(loadLocation(tableIdentifier)); - Assert.assertEquals( - "Records should match", originalRecords, resultDf4.orderBy("id").collectAsList()); - Assert.assertEquals("Schemas should match", originalSparkSchema, resultDf4.schema()); + + assertThat(resultDf4.orderBy("id").collectAsList()) + .as("Records should match") + .isEqualTo(originalRecords); + + assertThat(resultDf4.schema()).as("Schemas should match").isEqualTo(originalSparkSchema); } @Test @@ -1964,19 +1989,19 @@ public void testRemoveOrphanFilesActionSupport() throws InterruptedException { .location(table.location() + "/metadata") .olderThan(System.currentTimeMillis()) .execute(); - Assert.assertTrue( - "Should not delete any metadata files", Iterables.isEmpty(result1.orphanFileLocations())); + + assertThat(result1.orphanFileLocations()).as("Should not delete any metadata files").isEmpty(); DeleteOrphanFiles.Result result2 = actions.deleteOrphanFiles(table).olderThan(System.currentTimeMillis()).execute(); - Assert.assertEquals( - "Should delete 1 data file", 1, Iterables.size(result2.orphanFileLocations())); + + assertThat(result2.orphanFileLocations()).as("Should delete 1 data file").hasSize(1); Dataset resultDF = spark.read().format("iceberg").load(loadLocation(tableIdentifier)); List actualRecords = resultDF.as(Encoders.bean(SimpleRecord.class)).collectAsList(); - Assert.assertEquals("Rows must match", records, actualRecords); + assertThat(actualRecords).as("Rows must match").isEqualTo(records); } @Test @@ -2016,7 +2041,9 @@ public void testFilesTablePartitionId() { .map(r -> (Integer) r.getAs(DataFile.SPEC_ID.name())) .collect(Collectors.toList()); - Assert.assertEquals("Should have two partition specs", ImmutableList.of(spec0, spec1), actual); + assertThat(ImmutableList.of(spec0, spec1)) + .as("Should have two partition specs") + .isEqualTo(actual); } @Test @@ -2050,7 +2077,7 @@ public void testAllManifestTableSnapshotFiltering() { table.refresh(); Snapshot snapshot2 = table.currentSnapshot(); - Assert.assertEquals("Should have two manifests", 2, snapshot2.allManifests(table.io()).size()); + assertThat(snapshot2.allManifests(table.io())).as("Should have two manifests").hasSize(2); snapshotIdToManifests.addAll( snapshot2.allManifests(table.io()).stream() .map(manifest -> Pair.of(snapshot2.snapshotId(), manifest)) @@ -2092,7 +2119,7 @@ public void testAllManifestTableSnapshotFiltering() { .sorted(Comparator.comparing(o -> o.get("path").toString())) .collect(Collectors.toList()); - Assert.assertEquals("Manifests table should have 3 manifest rows", 3, actual.size()); + assertThat(actual).as("Manifests table should have 3 manifest rows").hasSize(3); for (int i = 0; i < expected.size(); i += 1) { TestHelpers.assertEqualsSafe( manifestTable.schema().asStruct(), expected.get(i), actual.get(i)); @@ -2101,7 +2128,7 @@ public void testAllManifestTableSnapshotFiltering() { @Test public void testTableWithInt96Timestamp() throws IOException { - File parquetTableDir = temp.newFolder("table_timestamp_int96"); + File parquetTableDir = temp.resolve("table_timestamp_int96").toFile(); String parquetTableLocation = parquetTableDir.toURI().toString(); Schema schema = new Schema( @@ -2154,9 +2181,7 @@ public void testTableWithInt96Timestamp() throws IOException { .load(loadLocation(tableIdentifier)) .select("tmp_col") .collectAsList(); - Assertions.assertThat(actual) - .as("Rows must match") - .containsExactlyInAnyOrderElementsOf(expected); + assertThat(actual).as("Rows must match").containsExactlyInAnyOrderElementsOf(expected); dropTable(tableIdentifier); } } @@ -2256,7 +2281,7 @@ private DeleteFile writeEqDeleteFile(Table table) { try { return FileHelpers.writeDeleteFile( table, - Files.localOutput(temp.newFile()), + Files.localOutput(File.createTempFile("junit", null, temp.toFile())), org.apache.iceberg.TestHelpers.Row.of(1), deletes, deleteRowSchema); @@ -2271,16 +2296,14 @@ private long totalSizeInBytes(Iterable dataFiles) { private void assertDataFilePartitions( List dataFiles, List expectedPartitionIds) { - Assert.assertEquals( - "Table should have " + expectedPartitionIds.size() + " data files", - expectedPartitionIds.size(), - dataFiles.size()); + assertThat(dataFiles) + .as("Table should have " + expectedPartitionIds.size() + " data files") + .hasSameSizeAs(expectedPartitionIds); for (int i = 0; i < dataFiles.size(); ++i) { - Assert.assertEquals( - "Data file should have partition of id " + expectedPartitionIds.get(i), - expectedPartitionIds.get(i).intValue(), - dataFiles.get(i).partition().get(0, Integer.class).intValue()); + assertThat(dataFiles.get(i).partition().get(0, Integer.class).intValue()) + .as("Data file should have partition of id " + expectedPartitionIds.get(i)) + .isEqualTo(expectedPartitionIds.get(i).intValue()); } } } diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestIcebergSpark.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestIcebergSpark.java index 37e329a8b97b..7eff93d204e4 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestIcebergSpark.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestIcebergSpark.java @@ -18,6 +18,9 @@ */ package org.apache.iceberg.spark.source; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + import java.math.BigDecimal; import java.nio.ByteBuffer; import java.sql.Date; @@ -33,22 +36,20 @@ import org.apache.spark.sql.types.DataTypes; import org.apache.spark.sql.types.DecimalType; import org.apache.spark.sql.types.VarcharType; -import org.assertj.core.api.Assertions; -import org.junit.AfterClass; -import org.junit.Assert; -import org.junit.BeforeClass; -import org.junit.Test; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; public class TestIcebergSpark { private static SparkSession spark = null; - @BeforeClass + @BeforeAll public static void startSpark() { TestIcebergSpark.spark = SparkSession.builder().master("local[2]").getOrCreate(); } - @AfterClass + @AfterAll public static void stopSpark() { SparkSession currentSpark = TestIcebergSpark.spark; TestIcebergSpark.spark = null; @@ -59,69 +60,64 @@ public static void stopSpark() { public void testRegisterIntegerBucketUDF() { IcebergSpark.registerBucketUDF(spark, "iceberg_bucket_int_16", DataTypes.IntegerType, 16); List results = spark.sql("SELECT iceberg_bucket_int_16(1)").collectAsList(); - Assert.assertEquals(1, results.size()); - Assert.assertEquals( - (int) Transforms.bucket(16).bind(Types.IntegerType.get()).apply(1), - results.get(0).getInt(0)); + + assertThat(results).hasSize(1); + assertThat(results.get(0).getInt(0)) + .isEqualTo(Transforms.bucket(16).bind(Types.IntegerType.get()).apply(1)); } @Test public void testRegisterShortBucketUDF() { IcebergSpark.registerBucketUDF(spark, "iceberg_bucket_short_16", DataTypes.ShortType, 16); List results = spark.sql("SELECT iceberg_bucket_short_16(1S)").collectAsList(); - Assert.assertEquals(1, results.size()); - Assert.assertEquals( - (int) Transforms.bucket(16).bind(Types.IntegerType.get()).apply(1), - results.get(0).getInt(0)); + assertThat(results).hasSize(1); + assertThat(results.get(0).getInt(0)) + .isEqualTo(Transforms.bucket(16).bind(Types.IntegerType.get()).apply(1)); } @Test public void testRegisterByteBucketUDF() { IcebergSpark.registerBucketUDF(spark, "iceberg_bucket_byte_16", DataTypes.ByteType, 16); List results = spark.sql("SELECT iceberg_bucket_byte_16(1Y)").collectAsList(); - Assert.assertEquals(1, results.size()); - Assert.assertEquals( - (int) Transforms.bucket(16).bind(Types.IntegerType.get()).apply(1), - results.get(0).getInt(0)); + assertThat(results).hasSize(1); + assertThat(results.get(0).getInt(0)) + .isEqualTo(Transforms.bucket(16).bind(Types.IntegerType.get()).apply(1)); } @Test public void testRegisterLongBucketUDF() { IcebergSpark.registerBucketUDF(spark, "iceberg_bucket_long_16", DataTypes.LongType, 16); List results = spark.sql("SELECT iceberg_bucket_long_16(1L)").collectAsList(); - Assert.assertEquals(1, results.size()); - Assert.assertEquals( - (int) Transforms.bucket(16).bind(Types.LongType.get()).apply(1L), results.get(0).getInt(0)); + assertThat(results).hasSize(1); + assertThat(results.get(0).getInt(0)) + .isEqualTo(Transforms.bucket(16).bind(Types.LongType.get()).apply(1L)); } @Test public void testRegisterStringBucketUDF() { IcebergSpark.registerBucketUDF(spark, "iceberg_bucket_string_16", DataTypes.StringType, 16); List results = spark.sql("SELECT iceberg_bucket_string_16('hello')").collectAsList(); - Assert.assertEquals(1, results.size()); - Assert.assertEquals( - (int) Transforms.bucket(16).bind(Types.StringType.get()).apply("hello"), - results.get(0).getInt(0)); + assertThat(results).hasSize(1); + assertThat(results.get(0).getInt(0)) + .isEqualTo(Transforms.bucket(16).bind(Types.StringType.get()).apply("hello")); } @Test public void testRegisterCharBucketUDF() { IcebergSpark.registerBucketUDF(spark, "iceberg_bucket_char_16", new CharType(5), 16); List results = spark.sql("SELECT iceberg_bucket_char_16('hello')").collectAsList(); - Assert.assertEquals(1, results.size()); - Assert.assertEquals( - (int) Transforms.bucket(16).bind(Types.StringType.get()).apply("hello"), - results.get(0).getInt(0)); + assertThat(results).hasSize(1); + assertThat(results.get(0).getInt(0)) + .isEqualTo(Transforms.bucket(16).bind(Types.StringType.get()).apply("hello")); } @Test public void testRegisterVarCharBucketUDF() { IcebergSpark.registerBucketUDF(spark, "iceberg_bucket_varchar_16", new VarcharType(5), 16); List results = spark.sql("SELECT iceberg_bucket_varchar_16('hello')").collectAsList(); - Assert.assertEquals(1, results.size()); - Assert.assertEquals( - (int) Transforms.bucket(16).bind(Types.StringType.get()).apply("hello"), - results.get(0).getInt(0)); + assertThat(results).hasSize(1); + assertThat(results.get(0).getInt(0)) + .isEqualTo(Transforms.bucket(16).bind(Types.StringType.get()).apply("hello")); } @Test @@ -129,13 +125,12 @@ public void testRegisterDateBucketUDF() { IcebergSpark.registerBucketUDF(spark, "iceberg_bucket_date_16", DataTypes.DateType, 16); List results = spark.sql("SELECT iceberg_bucket_date_16(DATE '2021-06-30')").collectAsList(); - Assert.assertEquals(1, results.size()); - Assert.assertEquals( - (int) + assertThat(results).hasSize(1); + assertThat(results.get(0).getInt(0)) + .isEqualTo( Transforms.bucket(16) .bind(Types.DateType.get()) - .apply(DateTimeUtils.fromJavaDate(Date.valueOf("2021-06-30"))), - results.get(0).getInt(0)); + .apply(DateTimeUtils.fromJavaDate(Date.valueOf("2021-06-30")))); } @Test @@ -146,42 +141,40 @@ public void testRegisterTimestampBucketUDF() { spark .sql("SELECT iceberg_bucket_timestamp_16(TIMESTAMP '2021-06-30 00:00:00.000')") .collectAsList(); - Assert.assertEquals(1, results.size()); - Assert.assertEquals( - (int) + assertThat(results).hasSize(1); + assertThat(results.get(0).getInt(0)) + .isEqualTo( Transforms.bucket(16) .bind(Types.TimestampType.withZone()) .apply( - DateTimeUtils.fromJavaTimestamp(Timestamp.valueOf("2021-06-30 00:00:00.000"))), - results.get(0).getInt(0)); + DateTimeUtils.fromJavaTimestamp(Timestamp.valueOf("2021-06-30 00:00:00.000")))); } @Test public void testRegisterBinaryBucketUDF() { IcebergSpark.registerBucketUDF(spark, "iceberg_bucket_binary_16", DataTypes.BinaryType, 16); List results = spark.sql("SELECT iceberg_bucket_binary_16(X'0020001F')").collectAsList(); - Assert.assertEquals(1, results.size()); - Assert.assertEquals( - (int) + assertThat(results).hasSize(1); + assertThat(results.get(0).getInt(0)) + .isEqualTo( Transforms.bucket(16) .bind(Types.BinaryType.get()) - .apply(ByteBuffer.wrap(new byte[] {0x00, 0x20, 0x00, 0x1F})), - results.get(0).getInt(0)); + .apply(ByteBuffer.wrap(new byte[] {0x00, 0x20, 0x00, 0x1F}))); } @Test public void testRegisterDecimalBucketUDF() { IcebergSpark.registerBucketUDF(spark, "iceberg_bucket_decimal_16", new DecimalType(4, 2), 16); List results = spark.sql("SELECT iceberg_bucket_decimal_16(11.11)").collectAsList(); - Assert.assertEquals(1, results.size()); - Assert.assertEquals( - (int) Transforms.bucket(16).bind(Types.DecimalType.of(4, 2)).apply(new BigDecimal("11.11")), - results.get(0).getInt(0)); + assertThat(results).hasSize(1); + assertThat(results.get(0).getInt(0)) + .isEqualTo( + Transforms.bucket(16).bind(Types.DecimalType.of(4, 2)).apply(new BigDecimal("11.11"))); } @Test public void testRegisterBooleanBucketUDF() { - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> IcebergSpark.registerBucketUDF( spark, "iceberg_bucket_boolean_16", DataTypes.BooleanType, 16)) @@ -191,7 +184,7 @@ public void testRegisterBooleanBucketUDF() { @Test public void testRegisterDoubleBucketUDF() { - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> IcebergSpark.registerBucketUDF( spark, "iceberg_bucket_double_16", DataTypes.DoubleType, 16)) @@ -201,7 +194,7 @@ public void testRegisterDoubleBucketUDF() { @Test public void testRegisterFloatBucketUDF() { - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> IcebergSpark.registerBucketUDF( spark, "iceberg_bucket_float_16", DataTypes.FloatType, 16)) @@ -213,37 +206,36 @@ public void testRegisterFloatBucketUDF() { public void testRegisterIntegerTruncateUDF() { IcebergSpark.registerTruncateUDF(spark, "iceberg_truncate_int_4", DataTypes.IntegerType, 4); List results = spark.sql("SELECT iceberg_truncate_int_4(1)").collectAsList(); - Assert.assertEquals(1, results.size()); - Assert.assertEquals( - Transforms.truncate(4).bind(Types.IntegerType.get()).apply(1), results.get(0).getInt(0)); + assertThat(results).hasSize(1); + assertThat(results.get(0).getInt(0)) + .isEqualTo(Transforms.truncate(4).bind(Types.IntegerType.get()).apply(1)); } @Test public void testRegisterLongTruncateUDF() { IcebergSpark.registerTruncateUDF(spark, "iceberg_truncate_long_4", DataTypes.LongType, 4); List results = spark.sql("SELECT iceberg_truncate_long_4(1L)").collectAsList(); - Assert.assertEquals(1, results.size()); - Assert.assertEquals( - Transforms.truncate(4).bind(Types.LongType.get()).apply(1L), results.get(0).getLong(0)); + assertThat(results).hasSize(1); + assertThat(results.get(0).getLong(0)) + .isEqualTo(Transforms.truncate(4).bind(Types.LongType.get()).apply(1L)); } @Test public void testRegisterDecimalTruncateUDF() { IcebergSpark.registerTruncateUDF(spark, "iceberg_truncate_decimal_4", new DecimalType(4, 2), 4); List results = spark.sql("SELECT iceberg_truncate_decimal_4(11.11)").collectAsList(); - Assert.assertEquals(1, results.size()); - Assert.assertEquals( - Transforms.truncate(4).bind(Types.DecimalType.of(4, 2)).apply(new BigDecimal("11.11")), - results.get(0).getDecimal(0)); + assertThat(results).hasSize(1); + assertThat(results.get(0).getDecimal(0)) + .isEqualTo( + Transforms.truncate(4).bind(Types.DecimalType.of(4, 2)).apply(new BigDecimal("11.11"))); } @Test public void testRegisterStringTruncateUDF() { IcebergSpark.registerTruncateUDF(spark, "iceberg_truncate_string_4", DataTypes.StringType, 4); List results = spark.sql("SELECT iceberg_truncate_string_4('hello')").collectAsList(); - Assert.assertEquals(1, results.size()); - Assert.assertEquals( - Transforms.truncate(4).bind(Types.StringType.get()).apply("hello"), - results.get(0).getString(0)); + assertThat(results).hasSize(1); + assertThat(results.get(0).getString(0)) + .isEqualTo(Transforms.truncate(4).bind(Types.StringType.get()).apply("hello")); } } diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestInternalRowWrapper.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestInternalRowWrapper.java index 9e75145faff9..a0bc98200281 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestInternalRowWrapper.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestInternalRowWrapper.java @@ -18,6 +18,8 @@ */ package org.apache.iceberg.spark.source; +import static org.assertj.core.api.Assertions.assertThat; + import java.util.Iterator; import org.apache.iceberg.RecordWrapperTest; import org.apache.iceberg.Schema; @@ -29,18 +31,17 @@ import org.apache.iceberg.spark.data.RandomData; import org.apache.iceberg.util.StructLikeWrapper; import org.apache.spark.sql.catalyst.InternalRow; -import org.junit.Assert; -import org.junit.Ignore; +import org.junit.jupiter.api.Disabled; public class TestInternalRowWrapper extends RecordWrapperTest { - @Ignore + @Disabled @Override public void testTimestampWithoutZone() { // Spark does not support timestamp without zone. } - @Ignore + @Disabled @Override public void testTime() { // Spark does not support time fields. @@ -61,8 +62,8 @@ protected void generateAndValidate(Schema schema, AssertMethod assertMethod) { StructLikeWrapper actualWrapper = StructLikeWrapper.forType(schema.asStruct()); StructLikeWrapper expectedWrapper = StructLikeWrapper.forType(schema.asStruct()); for (int i = 0; i < numRecords; i++) { - Assert.assertTrue("Should have more records", actual.hasNext()); - Assert.assertTrue("Should have more InternalRow", expected.hasNext()); + assertThat(actual).as("Should have more records").hasNext(); + assertThat(expected).as("Should have more InternalRow").hasNext(); StructLike recordStructLike = recordWrapper.wrap(actual.next()); StructLike rowStructLike = rowWrapper.wrap(expected.next()); @@ -73,7 +74,7 @@ protected void generateAndValidate(Schema schema, AssertMethod assertMethod) { expectedWrapper.set(rowStructLike)); } - Assert.assertFalse("Shouldn't have more record", actual.hasNext()); - Assert.assertFalse("Shouldn't have more InternalRow", expected.hasNext()); + assertThat(actual).as("Shouldn't have more record").isExhausted(); + assertThat(expected).as("Shouldn't have more InternalRow").isExhausted(); } } diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestMetadataTableReadableMetrics.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestMetadataTableReadableMetrics.java index 9075257fa9f1..0696c3291e36 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestMetadataTableReadableMetrics.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestMetadataTableReadableMetrics.java @@ -24,6 +24,7 @@ import java.io.IOException; import java.math.BigDecimal; import java.nio.ByteBuffer; +import java.nio.file.Path; import java.util.Base64; import java.util.List; import java.util.Map; @@ -41,19 +42,18 @@ import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.spark.SparkCatalogConfig; -import org.apache.iceberg.spark.SparkTestBaseWithCatalog; +import org.apache.iceberg.spark.TestBaseWithCatalog; import org.apache.iceberg.types.Types; import org.apache.iceberg.util.Pair; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; -import org.junit.After; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; -public class TestMetadataTableReadableMetrics extends SparkTestBaseWithCatalog { +public class TestMetadataTableReadableMetrics extends TestBaseWithCatalog { - @Rule public TemporaryFolder temp = new TemporaryFolder(); + @TempDir private Path temp; private static final Types.StructType LEAF_STRUCT_TYPE = Types.StructType.of( @@ -124,8 +124,7 @@ private Table createPrimitiveTable() throws IOException { createPrimitiveRecord( false, 2, 2L, Float.NaN, 2.0D, new BigDecimal("2.00"), "2", null, null)); - DataFile dataFile = - FileHelpers.writeDataFile(table, Files.localOutput(temp.newFile()), records); + DataFile dataFile = FileHelpers.writeDataFile(table, Files.localOutput(temp.toFile()), records); table.newAppend().appendFile(dataFile).commit(); return table; } @@ -143,13 +142,12 @@ private Pair createNestedTable() throws IOException { createNestedRecord(0L, 0.0), createNestedRecord(1L, Double.NaN), createNestedRecord(null, null)); - DataFile dataFile = - FileHelpers.writeDataFile(table, Files.localOutput(temp.newFile()), records); + DataFile dataFile = FileHelpers.writeDataFile(table, Files.localOutput(temp.toFile()), records); table.newAppend().appendFile(dataFile).commit(); return Pair.of(table, dataFile); } - @After + @AfterEach public void dropTable() { sql("DROP TABLE %s", tableName); } diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestPathIdentifier.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestPathIdentifier.java index 5baf6071233d..bb026b2ab2da 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestPathIdentifier.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestPathIdentifier.java @@ -19,9 +19,11 @@ package org.apache.iceberg.spark.source; import static org.apache.iceberg.types.Types.NestedField.required; +import static org.assertj.core.api.Assertions.assertThat; import java.io.File; import java.io.IOException; +import java.nio.file.Path; import org.apache.iceberg.BaseTable; import org.apache.iceberg.Schema; import org.apache.iceberg.hadoop.HadoopTableOperations; @@ -29,40 +31,37 @@ import org.apache.iceberg.spark.PathIdentifier; import org.apache.iceberg.spark.SparkCatalog; import org.apache.iceberg.spark.SparkSchemaUtil; -import org.apache.iceberg.spark.SparkTestBase; +import org.apache.iceberg.spark.TestBase; import org.apache.iceberg.types.Types; import org.apache.spark.sql.catalyst.analysis.NoSuchTableException; import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException; import org.apache.spark.sql.connector.expressions.Transform; import org.apache.spark.sql.util.CaseInsensitiveStringMap; -import org.assertj.core.api.Assertions; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; -public class TestPathIdentifier extends SparkTestBase { +public class TestPathIdentifier extends TestBase { private static final Schema SCHEMA = new Schema( required(1, "id", Types.LongType.get()), required(2, "data", Types.StringType.get())); - @Rule public TemporaryFolder temp = new TemporaryFolder(); + @TempDir private Path temp; private File tableLocation; private PathIdentifier identifier; private SparkCatalog sparkCatalog; - @Before + @BeforeEach public void before() throws IOException { - tableLocation = temp.newFolder(); + tableLocation = temp.toFile(); identifier = new PathIdentifier(tableLocation.getAbsolutePath()); sparkCatalog = new SparkCatalog(); sparkCatalog.initialize("test", new CaseInsensitiveStringMap(ImmutableMap.of())); } - @After + @AfterEach public void after() { tableLocation.delete(); sparkCatalog = null; @@ -75,12 +74,11 @@ public void testPathIdentifier() throws TableAlreadyExistsException, NoSuchTable sparkCatalog.createTable( identifier, SparkSchemaUtil.convert(SCHEMA), new Transform[0], ImmutableMap.of()); - Assert.assertEquals(table.table().location(), tableLocation.getAbsolutePath()); - Assertions.assertThat(table.table()).isInstanceOf(BaseTable.class); - Assertions.assertThat(((BaseTable) table.table()).operations()) - .isInstanceOf(HadoopTableOperations.class); + assertThat(tableLocation.getAbsolutePath()).isEqualTo(table.table().location()); + assertThat(table.table()).isInstanceOf(BaseTable.class); + assertThat(((BaseTable) table.table()).operations()).isInstanceOf(HadoopTableOperations.class); - Assert.assertEquals(sparkCatalog.loadTable(identifier), table); - Assert.assertTrue(sparkCatalog.dropTable(identifier)); + assertThat(table).isEqualTo(sparkCatalog.loadTable(identifier)); + assertThat(sparkCatalog.dropTable(identifier)).isTrue(); } } diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestReadProjection.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestReadProjection.java index eecc405b1a09..81a46cd68122 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestReadProjection.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestReadProjection.java @@ -19,6 +19,9 @@ package org.apache.iceberg.spark.source; import static org.apache.avro.Schema.Type.UNION; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static org.assertj.core.api.Assertions.within; import java.io.IOException; import java.util.List; @@ -33,8 +36,6 @@ import org.apache.iceberg.relocated.com.google.common.collect.Sets; import org.apache.iceberg.types.Comparators; import org.apache.iceberg.types.Types; -import org.assertj.core.api.Assertions; -import org.junit.Assert; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; @@ -64,12 +65,14 @@ public void testFullProjection() throws Exception { Record projected = writeAndRead("full_projection", schema, schema, record); - Assert.assertEquals( - "Should contain the correct id value", 34L, (long) projected.getField("id")); + assertThat((long) projected.getField("id")) + .as("Should contain the correct id value") + .isEqualTo(34L); int cmp = Comparators.charSequences().compare("test", (CharSequence) projected.getField("data")); - Assert.assertEquals("Should contain the correct data value", 0, cmp); + + assertThat(cmp).as("Should contain the correct data value").isEqualTo(0); } @Test @@ -94,8 +97,10 @@ public void testReorderedFullProjection() throws Exception { Record projected = writeAndRead("reordered_full_projection", schema, reordered, record); - Assert.assertEquals("Should contain the correct 0 value", "test", projected.get(0).toString()); - Assert.assertEquals("Should contain the correct 1 value", 34L, projected.get(1)); + assertThat(projected.get(0).toString()) + .as("Should contain the correct 0 value") + .isEqualTo("test"); + assertThat(projected.get(1)).as("Should contain the correct 1 value").isEqualTo(34L); } @Test @@ -121,9 +126,11 @@ public void testReorderedProjection() throws Exception { Record projected = writeAndRead("reordered_projection", schema, reordered, record); - Assert.assertNull("Should contain the correct 0 value", projected.get(0)); - Assert.assertEquals("Should contain the correct 1 value", "test", projected.get(1).toString()); - Assert.assertNull("Should contain the correct 2 value", projected.get(2)); + assertThat(projected.get(0)).as("Should contain the correct 0 value").isNull(); + assertThat(projected.get(1).toString()) + .as("Should contain the correct 1 value") + .isEqualTo("test"); + assertThat(projected.get(2)).as("Should contain the correct 2 value").isNull(); } @Test @@ -139,10 +146,9 @@ public void testEmptyProjection() throws Exception { Record projected = writeAndRead("empty_projection", schema, schema.select(), record); - Assert.assertNotNull("Should read a non-null record", projected); + assertThat(projected).as("Should read a non-null record").isNotNull(); // this is expected because there are no values - Assertions.assertThatThrownBy(() -> projected.get(0)) - .isInstanceOf(ArrayIndexOutOfBoundsException.class); + assertThatThrownBy(() -> projected.get(0)).isInstanceOf(ArrayIndexOutOfBoundsException.class); } @Test @@ -159,18 +165,19 @@ public void testBasicProjection() throws Exception { Schema idOnly = new Schema(Types.NestedField.required(0, "id", Types.LongType.get())); Record projected = writeAndRead("basic_projection_id", writeSchema, idOnly, record); - Assert.assertNull("Should not project data", projected.getField("data")); - Assert.assertEquals( - "Should contain the correct id value", 34L, (long) projected.getField("id")); + assertThat(projected.getField("data")).as("Should not project data").isNull(); + assertThat((long) projected.getField("id")) + .as("Should contain the correct id value") + .isEqualTo(34L); Schema dataOnly = new Schema(Types.NestedField.optional(1, "data", Types.StringType.get())); projected = writeAndRead("basic_projection_data", writeSchema, dataOnly, record); + assertThat(projected.getField("id")).as("Should not project id").isNull(); - Assert.assertNull("Should not project id", projected.getField("id")); int cmp = Comparators.charSequences().compare("test", (CharSequence) projected.getField("data")); - Assert.assertEquals("Should contain the correct data value", 0, cmp); + assertThat(cmp).as("Should contain the correct data value").isEqualTo(0); } @Test @@ -190,12 +197,13 @@ public void testRename() throws Exception { Types.NestedField.optional(1, "renamed", Types.StringType.get())); Record projected = writeAndRead("project_and_rename", writeSchema, readSchema, record); + assertThat((long) projected.getField("id")) + .as("Should contain the correct id value") + .isEqualTo(34L); - Assert.assertEquals( - "Should contain the correct id value", 34L, (long) projected.getField("id")); int cmp = Comparators.charSequences().compare("test", (CharSequence) projected.getField("renamed")); - Assert.assertEquals("Should contain the correct data/renamed value", 0, cmp); + assertThat(cmp).as("Should contain the correct data/renamed value").isEqualTo(0); } @Test @@ -221,9 +229,10 @@ public void testNestedStructProjection() throws Exception { Record projected = writeAndRead("id_only", writeSchema, idOnly, record); Record projectedLocation = (Record) projected.getField("location"); - Assert.assertEquals( - "Should contain the correct id value", 34L, (long) projected.getField("id")); - Assert.assertNull("Should not project location", projectedLocation); + assertThat((long) projected.getField("id")) + .as("Should contain the correct id value") + .isEqualTo(34L); + assertThat(projectedLocation).as("Should not project location").isNull(); Schema latOnly = new Schema( @@ -234,14 +243,12 @@ public void testNestedStructProjection() throws Exception { projected = writeAndRead("latitude_only", writeSchema, latOnly, record); projectedLocation = (Record) projected.getField("location"); - Assert.assertNull("Should not project id", projected.getField("id")); - Assert.assertNotNull("Should project location", projected.getField("location")); - Assert.assertNull("Should not project longitude", projectedLocation.getField("long")); - Assert.assertEquals( - "Should project latitude", - 52.995143f, - (float) projectedLocation.getField("lat"), - 0.000001f); + assertThat(projected.getField("id")).as("Should not project id").isNull(); + assertThat(projected.getField("location")).as("Should project location").isNotNull(); + assertThat(projectedLocation.getField("long")).as("Should not project longitude").isNull(); + assertThat((float) projectedLocation.getField("lat")) + .as("Should project latitude") + .isCloseTo(52.995143f, within(0.000001f)); Schema longOnly = new Schema( @@ -252,30 +259,26 @@ public void testNestedStructProjection() throws Exception { projected = writeAndRead("longitude_only", writeSchema, longOnly, record); projectedLocation = (Record) projected.getField("location"); - Assert.assertNull("Should not project id", projected.getField("id")); - Assert.assertNotNull("Should project location", projected.getField("location")); - Assert.assertNull("Should not project latitutde", projectedLocation.getField("lat")); - Assert.assertEquals( - "Should project longitude", - -1.539054f, - (float) projectedLocation.getField("long"), - 0.000001f); + + assertThat(projected.getField("id")).as("Should not project id").isNull(); + assertThat(projected.getField("location")).as("Should project location").isNotNull(); + assertThat(projectedLocation.getField("lat")).as("Should not project latitude").isNull(); + assertThat((float) projectedLocation.getField("long")) + .as("Should project longitude") + .isCloseTo(-1.539054f, within(0.000001f)); Schema locationOnly = writeSchema.select("location"); projected = writeAndRead("location_only", writeSchema, locationOnly, record); projectedLocation = (Record) projected.getField("location"); - Assert.assertNull("Should not project id", projected.getField("id")); - Assert.assertNotNull("Should project location", projected.getField("location")); - Assert.assertEquals( - "Should project latitude", - 52.995143f, - (float) projectedLocation.getField("lat"), - 0.000001f); - Assert.assertEquals( - "Should project longitude", - -1.539054f, - (float) projectedLocation.getField("long"), - 0.000001f); + + assertThat(projected.getField("id")).as("Should not project id").isNull(); + assertThat(projected.getField("location")).as("Should project location").isNotNull(); + assertThat((float) projectedLocation.getField("lat")) + .as("Should project latitude") + .isCloseTo(52.995143f, within(0.000001f)); + assertThat((float) projectedLocation.getField("long")) + .as("Should project longitude") + .isCloseTo(-1.539054f, within(0.000001f)); } @Test @@ -297,33 +300,31 @@ public void testMapProjection() throws IOException { Schema idOnly = new Schema(Types.NestedField.required(0, "id", Types.LongType.get())); Record projected = writeAndRead("id_only", writeSchema, idOnly, record); - Assert.assertEquals( - "Should contain the correct id value", 34L, (long) projected.getField("id")); - Assert.assertNull("Should not project properties map", projected.getField("properties")); + assertThat((long) projected.getField("id")) + .as("Should contain the correct id value") + .isEqualTo(34L); + assertThat(projected.getField("properties")).as("Should not project properties map").isNull(); Schema keyOnly = writeSchema.select("properties.key"); projected = writeAndRead("key_only", writeSchema, keyOnly, record); - Assert.assertNull("Should not project id", projected.getField("id")); - Assert.assertEquals( - "Should project entire map", - properties, - toStringMap((Map) projected.getField("properties"))); + assertThat(projected.getField("id")).as("Should not project id").isNull(); + assertThat(toStringMap((Map) projected.getField("properties"))) + .as("Should project entire map") + .isEqualTo(properties); Schema valueOnly = writeSchema.select("properties.value"); projected = writeAndRead("value_only", writeSchema, valueOnly, record); - Assert.assertNull("Should not project id", projected.getField("id")); - Assert.assertEquals( - "Should project entire map", - properties, - toStringMap((Map) projected.getField("properties"))); + assertThat(projected.getField("id")).as("Should not project id").isNull(); + assertThat(toStringMap((Map) projected.getField("properties"))) + .as("Should project entire map") + .isEqualTo(properties); Schema mapOnly = writeSchema.select("properties"); projected = writeAndRead("map_only", writeSchema, mapOnly, record); - Assert.assertNull("Should not project id", projected.getField("id")); - Assert.assertEquals( - "Should project entire map", - properties, - toStringMap((Map) projected.getField("properties"))); + assertThat(projected.getField("id")).as("Should not project id").isNull(); + assertThat(toStringMap((Map) projected.getField("properties"))) + .as("Should project entire map") + .isEqualTo(properties); } private Map toStringMap(Map map) { @@ -367,51 +368,63 @@ public void testMapOfStructsProjection() throws IOException { Schema idOnly = new Schema(Types.NestedField.required(0, "id", Types.LongType.get())); Record projected = writeAndRead("id_only", writeSchema, idOnly, record); - Assert.assertEquals( - "Should contain the correct id value", 34L, (long) projected.getField("id")); - Assert.assertNull("Should not project locations map", projected.getField("locations")); + assertThat(34L) + .as("Should contain the correct id value") + .isEqualTo((long) projected.getField("id")); + assertThat(projected.getField("locations")).as("Should not project locations map").isNull(); projected = writeAndRead("all_locations", writeSchema, writeSchema.select("locations"), record); - Assert.assertNull("Should not project id", projected.getField("id")); - Assert.assertEquals( - "Should project locations map", - record.getField("locations"), - toStringMap((Map) projected.getField("locations"))); + assertThat(projected.getField("id")).as("Should not project id").isNull(); + assertThat(toStringMap((Map) projected.getField("locations"))) + .as("Should project locations map") + .isEqualTo(record.getField("locations")); projected = writeAndRead("lat_only", writeSchema, writeSchema.select("locations.lat"), record); - Assert.assertNull("Should not project id", projected.getField("id")); + assertThat(projected.getField("id")).as("Should not project id").isNull(); + Map locations = toStringMap((Map) projected.getField("locations")); - Assert.assertNotNull("Should project locations map", locations); - Assert.assertEquals( - "Should contain L1 and L2", Sets.newHashSet("L1", "L2"), locations.keySet()); + assertThat(locations).as("Should project locations map").isNotNull(); + assertThat(locations.keySet()) + .as("Should contain L1 and L2") + .isEqualTo(Sets.newHashSet("L1", "L2")); + Record projectedL1 = (Record) locations.get("L1"); - Assert.assertNotNull("L1 should not be null", projectedL1); - Assert.assertEquals( - "L1 should contain lat", 53.992811f, (float) projectedL1.getField("lat"), 0.000001); - Assert.assertNull("L1 should not contain long", projectedL1.getField("long")); + assertThat(projectedL1).as("L1 should not be null").isNotNull(); + assertThat((float) projectedL1.getField("lat")) + .as("L1 should contain lat") + .isCloseTo(53.992811f, within(0.000001f)); + assertThat(projectedL1.getField("long")).as("L1 should not contain long").isNull(); + Record projectedL2 = (Record) locations.get("L2"); - Assert.assertNotNull("L2 should not be null", projectedL2); - Assert.assertEquals( - "L2 should contain lat", 52.995143f, (float) projectedL2.getField("lat"), 0.000001); - Assert.assertNull("L2 should not contain long", projectedL2.getField("long")); + assertThat(projectedL2).as("L2 should not be null").isNotNull(); + assertThat((float) projectedL2.getField("lat")) + .as("L2 should contain lat") + .isCloseTo(52.995143f, within(0.000001f)); + assertThat(projectedL2.getField("long")).as("L2 should not contain long").isNull(); projected = writeAndRead("long_only", writeSchema, writeSchema.select("locations.long"), record); - Assert.assertNull("Should not project id", projected.getField("id")); + assertThat(projected.getField("id")).as("Should not project id").isNull(); + locations = toStringMap((Map) projected.getField("locations")); - Assert.assertNotNull("Should project locations map", locations); - Assert.assertEquals( - "Should contain L1 and L2", Sets.newHashSet("L1", "L2"), locations.keySet()); + assertThat(locations).as("Should project locations map").isNotNull(); + assertThat(locations.keySet()) + .as("Should contain L1 and L2") + .isEqualTo(Sets.newHashSet("L1", "L2")); + projectedL1 = (Record) locations.get("L1"); - Assert.assertNotNull("L1 should not be null", projectedL1); - Assert.assertNull("L1 should not contain lat", projectedL1.getField("lat")); - Assert.assertEquals( - "L1 should contain long", -1.542616f, (float) projectedL1.getField("long"), 0.000001); + assertThat(projectedL1).as("L1 should not be null").isNotNull(); + assertThat(projectedL1.getField("lat")).as("L1 should not contain lat").isNull(); + assertThat((float) projectedL1.getField("long")) + .as("L1 should contain long") + .isCloseTo(-1.542616f, within(0.000001f)); + projectedL2 = (Record) locations.get("L2"); - Assert.assertNotNull("L2 should not be null", projectedL2); - Assert.assertNull("L2 should not contain lat", projectedL2.getField("lat")); - Assert.assertEquals( - "L2 should contain long", -1.539054f, (float) projectedL2.getField("long"), 0.000001); + assertThat(projectedL2).as("L2 should not be null").isNotNull(); + assertThat(projectedL2.getField("lat")).as("L2 should not contain lat").isNull(); + assertThat((float) projectedL2.getField("long")) + .as("L2 should contain long") + .isCloseTo(-1.539054f, within(0.000001f)); Schema latitiudeRenamed = new Schema( @@ -426,29 +439,28 @@ public void testMapOfStructsProjection() throws IOException { Types.NestedField.required(1, "latitude", Types.FloatType.get()))))); projected = writeAndRead("latitude_renamed", writeSchema, latitiudeRenamed, record); - Assert.assertNull("Should not project id", projected.getField("id")); + assertThat(projected.getField("id")).as("Should not project id").isNull(); locations = toStringMap((Map) projected.getField("locations")); - Assert.assertNotNull("Should project locations map", locations); - Assert.assertEquals( - "Should contain L1 and L2", Sets.newHashSet("L1", "L2"), locations.keySet()); + assertThat(locations).as("Should project locations map").isNotNull(); + assertThat(locations.keySet()) + .as("Should contain L1 and L2") + .isEqualTo(Sets.newHashSet("L1", "L2")); + projectedL1 = (Record) locations.get("L1"); - Assert.assertNotNull("L1 should not be null", projectedL1); - Assert.assertEquals( - "L1 should contain latitude", - 53.992811f, - (float) projectedL1.getField("latitude"), - 0.000001); - Assert.assertNull("L1 should not contain lat", projectedL1.getField("lat")); - Assert.assertNull("L1 should not contain long", projectedL1.getField("long")); + assertThat(projectedL1).as("L1 should not be null").isNotNull(); + assertThat((float) projectedL1.getField("latitude")) + .as("L1 should contain latitude") + .isCloseTo(53.992811f, within(0.000001f)); + assertThat(projectedL1.getField("lat")).as("L1 should not contain lat").isNull(); + assertThat(projectedL1.getField("long")).as("L1 should not contain long").isNull(); + projectedL2 = (Record) locations.get("L2"); - Assert.assertNotNull("L2 should not be null", projectedL2); - Assert.assertEquals( - "L2 should contain latitude", - 52.995143f, - (float) projectedL2.getField("latitude"), - 0.000001); - Assert.assertNull("L2 should not contain lat", projectedL2.getField("lat")); - Assert.assertNull("L2 should not contain long", projectedL2.getField("long")); + assertThat(projectedL2).as("L2 should not be null").isNotNull(); + assertThat((float) projectedL2.getField("latitude")) + .as("L2 should contain latitude") + .isCloseTo(52.995143f, within(0.000001f)); + assertThat(projectedL2.getField("lat")).as("L2 should not contain lat").isNull(); + assertThat(projectedL2.getField("long")).as("L2 should not contain long").isNull(); } @Test @@ -468,19 +480,20 @@ public void testListProjection() throws IOException { Schema idOnly = new Schema(Types.NestedField.required(0, "id", Types.LongType.get())); Record projected = writeAndRead("id_only", writeSchema, idOnly, record); - Assert.assertEquals( - "Should contain the correct id value", 34L, (long) projected.getField("id")); - Assert.assertNull("Should not project values list", projected.getField("values")); + assertThat((long) projected.getField("id")) + .as("Should contain the correct id value") + .isEqualTo(34L); + assertThat(projected.getField("values")).as("Should not project values list").isNull(); Schema elementOnly = writeSchema.select("values.element"); projected = writeAndRead("element_only", writeSchema, elementOnly, record); - Assert.assertNull("Should not project id", projected.getField("id")); - Assert.assertEquals("Should project entire list", values, projected.getField("values")); + assertThat(projected.getField("id")).as("Should not project id").isNull(); + assertThat(projected.getField("values")).as("Should project entire list").isEqualTo(values); Schema listOnly = writeSchema.select("values"); projected = writeAndRead("list_only", writeSchema, listOnly, record); - Assert.assertNull("Should not project id", projected.getField("id")); - Assert.assertEquals("Should project entire list", values, projected.getField("values")); + assertThat(projected.getField("id")).as("Should not project id").isNull(); + assertThat(projected.getField("values")).as("Should project entire list").isEqualTo(values); } @Test @@ -511,38 +524,46 @@ public void testListOfStructsProjection() throws IOException { Schema idOnly = new Schema(Types.NestedField.required(0, "id", Types.LongType.get())); Record projected = writeAndRead("id_only", writeSchema, idOnly, record); - Assert.assertEquals( - "Should contain the correct id value", 34L, (long) projected.getField("id")); - Assert.assertNull("Should not project points list", projected.getField("points")); + assertThat((long) projected.getField("id")) + .as("Should contain the correct id value") + .isEqualTo(34L); + assertThat(projected.getField("points")).as("Should not project points list").isNull(); projected = writeAndRead("all_points", writeSchema, writeSchema.select("points"), record); - Assert.assertNull("Should not project id", projected.getField("id")); - Assert.assertEquals( - "Should project points list", record.getField("points"), projected.getField("points")); + assertThat(projected.getField("id")).as("Should not project id").isNull(); + assertThat(projected.getField("points")) + .as("Should project points list") + .isEqualTo(record.getField("points")); projected = writeAndRead("x_only", writeSchema, writeSchema.select("points.x"), record); - Assert.assertNull("Should not project id", projected.getField("id")); - Assert.assertNotNull("Should project points list", projected.getField("points")); + assertThat(projected.getField("id")).as("Should not project id").isNull(); + assertThat(projected.getField("points")).as("Should project points list").isNotNull(); + List points = (List) projected.getField("points"); - Assert.assertEquals("Should read 2 points", 2, points.size()); + assertThat(points).as("Should read 2 points").hasSize(2); + Record projectedP1 = points.get(0); - Assert.assertEquals("Should project x", 1, (int) projectedP1.getField("x")); - Assert.assertNull("Should not project y", projectedP1.getField("y")); + assertThat((int) projectedP1.getField("x")).as("Should project x").isEqualTo(1); + assertThat(projected.getField("y")).as("Should not project y").isNull(); + Record projectedP2 = points.get(1); - Assert.assertEquals("Should project x", 3, (int) projectedP2.getField("x")); - Assert.assertNull("Should not project y", projectedP2.getField("y")); + assertThat((int) projectedP2.getField("x")).as("Should project x").isEqualTo(3); + assertThat(projected.getField("y")).as("Should not project y").isNull(); projected = writeAndRead("y_only", writeSchema, writeSchema.select("points.y"), record); - Assert.assertNull("Should not project id", projected.getField("id")); - Assert.assertNotNull("Should project points list", projected.getField("points")); + assertThat(projected.getField("id")).as("Should not project id").isNull(); + assertThat(projected.getField("points")).as("Should project points list").isNotNull(); + points = (List) projected.getField("points"); - Assert.assertEquals("Should read 2 points", 2, points.size()); + assertThat(points).as("Should read 2 points").hasSize(2); + projectedP1 = points.get(0); - Assert.assertNull("Should not project x", projectedP1.getField("x")); - Assert.assertEquals("Should project y", 2, (int) projectedP1.getField("y")); + assertThat(projectedP1.getField("x")).as("Should not project x").isNull(); + assertThat((int) projectedP1.getField("y")).as("Should project y").isEqualTo(2); + projectedP2 = points.get(1); - Assert.assertNull("Should not project x", projectedP2.getField("x")); - Assert.assertNull("Should project null y", projectedP2.getField("y")); + assertThat(projectedP2.getField("x")).as("Should not project x").isNull(); + assertThat(projectedP2.getField("y")).as("Should not project y").isNull(); Schema yRenamed = new Schema( @@ -555,18 +576,21 @@ public void testListOfStructsProjection() throws IOException { Types.NestedField.optional(18, "z", Types.IntegerType.get()))))); projected = writeAndRead("y_renamed", writeSchema, yRenamed, record); - Assert.assertNull("Should not project id", projected.getField("id")); - Assert.assertNotNull("Should project points list", projected.getField("points")); + assertThat(projected.getField("id")).as("Should not project id").isNull(); + assertThat(projected.getField("points")).as("Should project points list").isNotNull(); + points = (List) projected.getField("points"); - Assert.assertEquals("Should read 2 points", 2, points.size()); + assertThat(points).as("Should read 2 points").hasSize(2); + projectedP1 = points.get(0); - Assert.assertNull("Should not project x", projectedP1.getField("x")); - Assert.assertNull("Should not project y", projectedP1.getField("y")); - Assert.assertEquals("Should project z", 2, (int) projectedP1.getField("z")); + assertThat(projectedP1.getField("x")).as("Should not project x").isNull(); + assertThat(projectedP1.getField("y")).as("Should not project y").isNull(); + assertThat((int) projectedP1.getField("z")).as("Should project z").isEqualTo(2); + projectedP2 = points.get(1); - Assert.assertNull("Should not project x", projectedP2.getField("x")); - Assert.assertNull("Should not project y", projectedP2.getField("y")); - Assert.assertNull("Should project null z", projectedP2.getField("z")); + assertThat(projectedP2.getField("x")).as("Should not project x").isNull(); + assertThat(projectedP2.getField("y")).as("Should not project y").isNull(); + assertThat(projectedP2.getField("z")).as("Should project null z").isNull(); Schema zAdded = new Schema( @@ -581,18 +605,21 @@ public void testListOfStructsProjection() throws IOException { Types.NestedField.optional(20, "z", Types.IntegerType.get()))))); projected = writeAndRead("z_added", writeSchema, zAdded, record); - Assert.assertNull("Should not project id", projected.getField("id")); - Assert.assertNotNull("Should project points list", projected.getField("points")); + assertThat(projected.getField("id")).as("Should not project id").isNull(); + assertThat(projected.getField("points")).as("Should project points list").isNotNull(); + points = (List) projected.getField("points"); - Assert.assertEquals("Should read 2 points", 2, points.size()); + assertThat(points).as("Should read 2 points").hasSize(2); + projectedP1 = points.get(0); - Assert.assertEquals("Should project x", 1, (int) projectedP1.getField("x")); - Assert.assertEquals("Should project y", 2, (int) projectedP1.getField("y")); - Assert.assertNull("Should contain null z", projectedP1.getField("z")); + assertThat((int) projectedP1.getField("x")).as("Should project x").isEqualTo(1); + assertThat((int) projectedP1.getField("y")).as("Should project y").isEqualTo(2); + assertThat(projectedP1.getField("z")).as("Should contain null z").isNull(); + projectedP2 = points.get(1); - Assert.assertEquals("Should project x", 3, (int) projectedP2.getField("x")); - Assert.assertNull("Should project null y", projectedP2.getField("y")); - Assert.assertNull("Should contain null z", projectedP2.getField("z")); + assertThat((int) projectedP2.getField("x")).as("Should project x").isEqualTo(3); + assertThat(projectedP2.getField("y")).as("Should project null y").isNull(); + assertThat(projectedP2.getField("z")).as("Should contain null z").isNull(); } private static org.apache.avro.Schema fromOption(org.apache.avro.Schema schema) { diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkAggregates.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkAggregates.java index e2d6f744f5a5..06b68b77e680 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkAggregates.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkAggregates.java @@ -18,6 +18,8 @@ */ package org.apache.iceberg.spark.source; +import static org.assertj.core.api.Assertions.assertThat; + import java.util.Map; import org.apache.iceberg.expressions.Expression; import org.apache.iceberg.expressions.Expressions; @@ -29,8 +31,7 @@ import org.apache.spark.sql.connector.expressions.aggregate.CountStar; import org.apache.spark.sql.connector.expressions.aggregate.Max; import org.apache.spark.sql.connector.expressions.aggregate.Min; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestSparkAggregates { @@ -50,27 +51,34 @@ public void testAggregates() { Max max = new Max(namedReference); Expression expectedMax = Expressions.max(unquoted); Expression actualMax = SparkAggregates.convert(max); - Assert.assertEquals("Max must match", expectedMax.toString(), actualMax.toString()); + assertThat(String.valueOf(actualMax)) + .as("Max must match") + .isEqualTo(expectedMax.toString()); Min min = new Min(namedReference); Expression expectedMin = Expressions.min(unquoted); Expression actualMin = SparkAggregates.convert(min); - Assert.assertEquals("Min must match", expectedMin.toString(), actualMin.toString()); + assertThat(String.valueOf(actualMin)) + .as("Min must match") + .isEqualTo(expectedMin.toString()); Count count = new Count(namedReference, false); Expression expectedCount = Expressions.count(unquoted); Expression actualCount = SparkAggregates.convert(count); - Assert.assertEquals("Count must match", expectedCount.toString(), actualCount.toString()); + assertThat(String.valueOf(actualCount)) + .as("Count must match") + .isEqualTo(expectedCount.toString()); Count countDistinct = new Count(namedReference, true); Expression convertedCountDistinct = SparkAggregates.convert(countDistinct); - Assert.assertNull("Count Distinct is converted to null", convertedCountDistinct); + assertThat(convertedCountDistinct).as("Count Distinct is converted to null").isNull(); CountStar countStar = new CountStar(); Expression expectedCountStar = Expressions.countStar(); Expression actualCountStar = SparkAggregates.convert(countStar); - Assert.assertEquals( - "CountStar must match", expectedCountStar.toString(), actualCountStar.toString()); + assertThat(String.valueOf(actualCountStar)) + .as("CountStar must match") + .isEqualTo(expectedCountStar.toString()); }); } } diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkCatalogCacheExpiration.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkCatalogCacheExpiration.java index 3d668197fd51..9ff3de74f6fb 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkCatalogCacheExpiration.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkCatalogCacheExpiration.java @@ -18,6 +18,8 @@ */ package org.apache.iceberg.spark.source; +import static org.assertj.core.api.Assertions.assertThat; + import java.util.Map; import org.apache.iceberg.CachingCatalog; import org.apache.iceberg.CatalogProperties; @@ -25,13 +27,12 @@ import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.spark.SparkCatalog; import org.apache.iceberg.spark.SparkSessionCatalog; -import org.apache.iceberg.spark.SparkTestBaseWithCatalog; +import org.apache.iceberg.spark.TestBaseWithCatalog; import org.apache.spark.sql.connector.catalog.TableCatalog; -import org.assertj.core.api.Assertions; -import org.junit.BeforeClass; -import org.junit.Test; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; -public class TestSparkCatalogCacheExpiration extends SparkTestBaseWithCatalog { +public class TestSparkCatalogCacheExpiration extends TestBaseWithCatalog { private static final String sessionCatalogName = "spark_catalog"; private static final String sessionCatalogImpl = SparkSessionCatalog.class.getName(); @@ -57,7 +58,7 @@ private static String asSqlConfCatalogKeyFor(String catalog, String configKey) { // Add more catalogs to the spark session, so we only need to start spark one time for multiple // different catalog configuration tests. - @BeforeClass + @BeforeAll public static void beforeClass() { // Catalog - expiration_disabled: Catalog with caching on and expiration disabled. ImmutableMap.of( @@ -93,18 +94,18 @@ public TestSparkCatalogCacheExpiration() { @Test public void testSparkSessionCatalogWithExpirationEnabled() { SparkSessionCatalog sparkCatalog = sparkSessionCatalog(); - Assertions.assertThat(sparkCatalog) + assertThat(sparkCatalog) .extracting("icebergCatalog") .extracting("cacheEnabled") .isEqualTo(true); - Assertions.assertThat(sparkCatalog) + assertThat(sparkCatalog) .extracting("icebergCatalog") .extracting("icebergCatalog") .isInstanceOfSatisfying( Catalog.class, icebergCatalog -> { - Assertions.assertThat(icebergCatalog) + assertThat(icebergCatalog) .isExactlyInstanceOf(CachingCatalog.class) .extracting("expirationIntervalMillis") .isEqualTo(3000L); @@ -114,30 +115,27 @@ public void testSparkSessionCatalogWithExpirationEnabled() { @Test public void testCacheEnabledAndExpirationDisabled() { SparkCatalog sparkCatalog = getSparkCatalog("expiration_disabled"); - Assertions.assertThat(sparkCatalog).extracting("cacheEnabled").isEqualTo(true); + assertThat(sparkCatalog).extracting("cacheEnabled").isEqualTo(true); - Assertions.assertThat(sparkCatalog) + assertThat(sparkCatalog) .extracting("icebergCatalog") .isInstanceOfSatisfying( CachingCatalog.class, icebergCatalog -> { - Assertions.assertThat(icebergCatalog) - .extracting("expirationIntervalMillis") - .isEqualTo(-1L); + assertThat(icebergCatalog).extracting("expirationIntervalMillis").isEqualTo(-1L); }); } @Test public void testCacheDisabledImplicitly() { SparkCatalog sparkCatalog = getSparkCatalog("cache_disabled_implicitly"); - Assertions.assertThat(sparkCatalog).extracting("cacheEnabled").isEqualTo(false); + assertThat(sparkCatalog).extracting("cacheEnabled").isEqualTo(false); - Assertions.assertThat(sparkCatalog) + assertThat(sparkCatalog) .extracting("icebergCatalog") .isInstanceOfSatisfying( Catalog.class, - icebergCatalog -> - Assertions.assertThat(icebergCatalog).isNotInstanceOf(CachingCatalog.class)); + icebergCatalog -> assertThat(icebergCatalog).isNotInstanceOf(CachingCatalog.class)); } private SparkSessionCatalog sparkSessionCatalog() { diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkDataFile.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkDataFile.java index b894d32326dc..f0a1a28509e1 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkDataFile.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkDataFile.java @@ -25,6 +25,7 @@ import java.io.File; import java.io.IOException; import java.nio.ByteBuffer; +import java.nio.file.Path; import java.util.Arrays; import java.util.Collections; import java.util.List; @@ -70,12 +71,11 @@ import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.catalyst.InternalRow; import org.apache.spark.sql.types.StructType; -import org.junit.AfterClass; -import org.junit.Before; -import org.junit.BeforeClass; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; public class TestSparkDataFile { @@ -119,13 +119,13 @@ public class TestSparkDataFile { private static SparkSession spark; private static JavaSparkContext sparkContext = null; - @BeforeClass + @BeforeAll public static void startSpark() { TestSparkDataFile.spark = SparkSession.builder().master("local[2]").getOrCreate(); TestSparkDataFile.sparkContext = JavaSparkContext.fromSparkContext(spark.sparkContext()); } - @AfterClass + @AfterAll public static void stopSpark() { SparkSession currentSpark = TestSparkDataFile.spark; TestSparkDataFile.spark = null; @@ -133,12 +133,12 @@ public static void stopSpark() { currentSpark.stop(); } - @Rule public TemporaryFolder temp = new TemporaryFolder(); + @TempDir private Path temp; private String tableLocation = null; - @Before + @BeforeEach public void setupTableLocation() throws Exception { - File tableDir = temp.newFolder(); + File tableDir = temp.toFile(); this.tableLocation = tableDir.toURI().toString(); } diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkReadMetrics.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkReadMetrics.java index fea5a1d3e1c3..4ee2d9e4e558 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkReadMetrics.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkReadMetrics.java @@ -18,24 +18,24 @@ */ package org.apache.iceberg.spark.source; +import static org.assertj.core.api.Assertions.assertThat; import static scala.collection.JavaConverters.seqAsJavaListConverter; import java.util.List; import java.util.Map; -import org.apache.iceberg.spark.SparkTestBaseWithCatalog; +import org.apache.iceberg.spark.TestBaseWithCatalog; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.apache.spark.sql.catalyst.analysis.NoSuchTableException; import org.apache.spark.sql.execution.SparkPlan; import org.apache.spark.sql.execution.metric.SQLMetric; -import org.assertj.core.api.Assertions; -import org.junit.After; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Test; import scala.collection.JavaConverters; -public class TestSparkReadMetrics extends SparkTestBaseWithCatalog { +public class TestSparkReadMetrics extends TestBaseWithCatalog { - @After + @AfterEach public void removeTables() { sql("DROP TABLE IF EXISTS %s", tableName); } @@ -57,30 +57,30 @@ public void testReadMetricsForV1Table() throws NoSuchTableException { Map metricsMap = JavaConverters.mapAsJavaMapConverter(sparkPlans.get(0).metrics()).asJava(); // Common - Assertions.assertThat(metricsMap.get("totalPlanningDuration").value()).isNotEqualTo(0); + assertThat(metricsMap.get("totalPlanningDuration").value()).isNotEqualTo(0); // data manifests - Assertions.assertThat(metricsMap.get("totalDataManifest").value()).isEqualTo(2); - Assertions.assertThat(metricsMap.get("scannedDataManifests").value()).isEqualTo(2); - Assertions.assertThat(metricsMap.get("skippedDataManifests").value()).isEqualTo(0); + assertThat(metricsMap.get("totalDataManifest").value()).isEqualTo(2); + assertThat(metricsMap.get("scannedDataManifests").value()).isEqualTo(2); + assertThat(metricsMap.get("skippedDataManifests").value()).isEqualTo(0); // data files - Assertions.assertThat(metricsMap.get("resultDataFiles").value()).isEqualTo(1); - Assertions.assertThat(metricsMap.get("skippedDataFiles").value()).isEqualTo(1); - Assertions.assertThat(metricsMap.get("totalDataFileSize").value()).isNotEqualTo(0); + assertThat(metricsMap.get("resultDataFiles").value()).isEqualTo(1); + assertThat(metricsMap.get("skippedDataFiles").value()).isEqualTo(1); + assertThat(metricsMap.get("totalDataFileSize").value()).isNotEqualTo(0); // delete manifests - Assertions.assertThat(metricsMap.get("totalDeleteManifests").value()).isEqualTo(0); - Assertions.assertThat(metricsMap.get("scannedDeleteManifests").value()).isEqualTo(0); - Assertions.assertThat(metricsMap.get("skippedDeleteManifests").value()).isEqualTo(0); + assertThat(metricsMap.get("totalDeleteManifests").value()).isEqualTo(0); + assertThat(metricsMap.get("scannedDeleteManifests").value()).isEqualTo(0); + assertThat(metricsMap.get("skippedDeleteManifests").value()).isEqualTo(0); // delete files - Assertions.assertThat(metricsMap.get("totalDeleteFileSize").value()).isEqualTo(0); - Assertions.assertThat(metricsMap.get("resultDeleteFiles").value()).isEqualTo(0); - Assertions.assertThat(metricsMap.get("equalityDeleteFiles").value()).isEqualTo(0); - Assertions.assertThat(metricsMap.get("indexedDeleteFiles").value()).isEqualTo(0); - Assertions.assertThat(metricsMap.get("positionalDeleteFiles").value()).isEqualTo(0); - Assertions.assertThat(metricsMap.get("skippedDeleteFiles").value()).isEqualTo(0); + assertThat(metricsMap.get("totalDeleteFileSize").value()).isEqualTo(0); + assertThat(metricsMap.get("resultDeleteFiles").value()).isEqualTo(0); + assertThat(metricsMap.get("equalityDeleteFiles").value()).isEqualTo(0); + assertThat(metricsMap.get("indexedDeleteFiles").value()).isEqualTo(0); + assertThat(metricsMap.get("positionalDeleteFiles").value()).isEqualTo(0); + assertThat(metricsMap.get("skippedDeleteFiles").value()).isEqualTo(0); } @Test @@ -101,30 +101,30 @@ public void testReadMetricsForV2Table() throws NoSuchTableException { JavaConverters.mapAsJavaMapConverter(sparkPlans.get(0).metrics()).asJava(); // Common - Assertions.assertThat(metricsMap.get("totalPlanningDuration").value()).isNotEqualTo(0); + assertThat(metricsMap.get("totalPlanningDuration").value()).isNotEqualTo(0); // data manifests - Assertions.assertThat(metricsMap.get("totalDataManifest").value()).isEqualTo(2); - Assertions.assertThat(metricsMap.get("scannedDataManifests").value()).isEqualTo(2); - Assertions.assertThat(metricsMap.get("skippedDataManifests").value()).isEqualTo(0); + assertThat(metricsMap.get("totalDataManifest").value()).isEqualTo(2); + assertThat(metricsMap.get("scannedDataManifests").value()).isEqualTo(2); + assertThat(metricsMap.get("skippedDataManifests").value()).isEqualTo(0); // data files - Assertions.assertThat(metricsMap.get("resultDataFiles").value()).isEqualTo(1); - Assertions.assertThat(metricsMap.get("skippedDataFiles").value()).isEqualTo(1); - Assertions.assertThat(metricsMap.get("totalDataFileSize").value()).isNotEqualTo(0); + assertThat(metricsMap.get("resultDataFiles").value()).isEqualTo(1); + assertThat(metricsMap.get("skippedDataFiles").value()).isEqualTo(1); + assertThat(metricsMap.get("totalDataFileSize").value()).isNotEqualTo(0); // delete manifests - Assertions.assertThat(metricsMap.get("totalDeleteManifests").value()).isEqualTo(0); - Assertions.assertThat(metricsMap.get("scannedDeleteManifests").value()).isEqualTo(0); - Assertions.assertThat(metricsMap.get("skippedDeleteManifests").value()).isEqualTo(0); + assertThat(metricsMap.get("totalDeleteManifests").value()).isEqualTo(0); + assertThat(metricsMap.get("scannedDeleteManifests").value()).isEqualTo(0); + assertThat(metricsMap.get("skippedDeleteManifests").value()).isEqualTo(0); // delete files - Assertions.assertThat(metricsMap.get("totalDeleteFileSize").value()).isEqualTo(0); - Assertions.assertThat(metricsMap.get("resultDeleteFiles").value()).isEqualTo(0); - Assertions.assertThat(metricsMap.get("equalityDeleteFiles").value()).isEqualTo(0); - Assertions.assertThat(metricsMap.get("indexedDeleteFiles").value()).isEqualTo(0); - Assertions.assertThat(metricsMap.get("positionalDeleteFiles").value()).isEqualTo(0); - Assertions.assertThat(metricsMap.get("skippedDeleteFiles").value()).isEqualTo(0); + assertThat(metricsMap.get("totalDeleteFileSize").value()).isEqualTo(0); + assertThat(metricsMap.get("resultDeleteFiles").value()).isEqualTo(0); + assertThat(metricsMap.get("equalityDeleteFiles").value()).isEqualTo(0); + assertThat(metricsMap.get("indexedDeleteFiles").value()).isEqualTo(0); + assertThat(metricsMap.get("positionalDeleteFiles").value()).isEqualTo(0); + assertThat(metricsMap.get("skippedDeleteFiles").value()).isEqualTo(0); } @Test @@ -152,29 +152,29 @@ public void testDeleteMetrics() throws NoSuchTableException { JavaConverters.mapAsJavaMapConverter(sparkPlans.get(0).metrics()).asJava(); // Common - Assertions.assertThat(metricsMap.get("totalPlanningDuration").value()).isNotEqualTo(0); + assertThat(metricsMap.get("totalPlanningDuration").value()).isNotEqualTo(0); // data manifests - Assertions.assertThat(metricsMap.get("totalDataManifest").value()).isEqualTo(1); - Assertions.assertThat(metricsMap.get("scannedDataManifests").value()).isEqualTo(1); - Assertions.assertThat(metricsMap.get("skippedDataManifests").value()).isEqualTo(0); + assertThat(metricsMap.get("totalDataManifest").value()).isEqualTo(1); + assertThat(metricsMap.get("scannedDataManifests").value()).isEqualTo(1); + assertThat(metricsMap.get("skippedDataManifests").value()).isEqualTo(0); // data files - Assertions.assertThat(metricsMap.get("resultDataFiles").value()).isEqualTo(1); - Assertions.assertThat(metricsMap.get("skippedDataFiles").value()).isEqualTo(0); - Assertions.assertThat(metricsMap.get("totalDataFileSize").value()).isNotEqualTo(0); + assertThat(metricsMap.get("resultDataFiles").value()).isEqualTo(1); + assertThat(metricsMap.get("skippedDataFiles").value()).isEqualTo(0); + assertThat(metricsMap.get("totalDataFileSize").value()).isNotEqualTo(0); // delete manifests - Assertions.assertThat(metricsMap.get("totalDeleteManifests").value()).isEqualTo(1); - Assertions.assertThat(metricsMap.get("scannedDeleteManifests").value()).isEqualTo(1); - Assertions.assertThat(metricsMap.get("skippedDeleteManifests").value()).isEqualTo(0); + assertThat(metricsMap.get("totalDeleteManifests").value()).isEqualTo(1); + assertThat(metricsMap.get("scannedDeleteManifests").value()).isEqualTo(1); + assertThat(metricsMap.get("skippedDeleteManifests").value()).isEqualTo(0); // delete files - Assertions.assertThat(metricsMap.get("totalDeleteFileSize").value()).isNotEqualTo(0); - Assertions.assertThat(metricsMap.get("resultDeleteFiles").value()).isEqualTo(1); - Assertions.assertThat(metricsMap.get("equalityDeleteFiles").value()).isEqualTo(0); - Assertions.assertThat(metricsMap.get("indexedDeleteFiles").value()).isEqualTo(1); - Assertions.assertThat(metricsMap.get("positionalDeleteFiles").value()).isEqualTo(1); - Assertions.assertThat(metricsMap.get("skippedDeleteFiles").value()).isEqualTo(0); + assertThat(metricsMap.get("totalDeleteFileSize").value()).isNotEqualTo(0); + assertThat(metricsMap.get("resultDeleteFiles").value()).isEqualTo(1); + assertThat(metricsMap.get("equalityDeleteFiles").value()).isEqualTo(0); + assertThat(metricsMap.get("indexedDeleteFiles").value()).isEqualTo(1); + assertThat(metricsMap.get("positionalDeleteFiles").value()).isEqualTo(1); + assertThat(metricsMap.get("skippedDeleteFiles").value()).isEqualTo(0); } } diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkStagedScan.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkStagedScan.java index 241293f367aa..3305b9e91384 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkStagedScan.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkStagedScan.java @@ -18,6 +18,8 @@ */ package org.apache.iceberg.spark.source; +import static org.assertj.core.api.Assertions.assertThat; + import java.io.IOException; import java.util.List; import java.util.Map; @@ -26,30 +28,28 @@ import org.apache.iceberg.Table; import org.apache.iceberg.io.CloseableIterable; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; -import org.apache.iceberg.relocated.com.google.common.collect.Iterables; +import org.apache.iceberg.spark.CatalogTestBase; import org.apache.iceberg.spark.ScanTaskSetManager; -import org.apache.iceberg.spark.SparkCatalogTestBase; import org.apache.iceberg.spark.SparkReadOptions; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.apache.spark.sql.catalyst.analysis.NoSuchTableException; -import org.junit.After; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.TestTemplate; -public class TestSparkStagedScan extends SparkCatalogTestBase { +public class TestSparkStagedScan extends CatalogTestBase { public TestSparkStagedScan( String catalogName, String implementation, Map config) { super(catalogName, implementation, config); } - @After + @AfterEach public void removeTables() { sql("DROP TABLE IF EXISTS %s", tableName); } - @Test + @TestTemplate public void testTaskSetLoading() throws NoSuchTableException, IOException { sql("CREATE TABLE %s (id INT, data STRING) USING iceberg", tableName); @@ -59,7 +59,7 @@ public void testTaskSetLoading() throws NoSuchTableException, IOException { df.writeTo(tableName).append(); Table table = validationCatalog.loadTable(tableIdent); - Assert.assertEquals("Should produce 1 snapshot", 1, Iterables.size(table.snapshots())); + assertThat(table.snapshots()).as("Should produce 1 snapshot").hasSize(1); try (CloseableIterable fileScanTasks = table.newScan().planFiles()) { ScanTaskSetManager taskSetManager = ScanTaskSetManager.get(); @@ -84,7 +84,7 @@ public void testTaskSetLoading() throws NoSuchTableException, IOException { sql("SELECT * FROM %s ORDER BY id", tableName)); } - @Test + @TestTemplate public void testTaskSetPlanning() throws NoSuchTableException, IOException { sql("CREATE TABLE %s (id INT, data STRING) USING iceberg", tableName); @@ -95,7 +95,7 @@ public void testTaskSetPlanning() throws NoSuchTableException, IOException { df.coalesce(1).writeTo(tableName).append(); Table table = validationCatalog.loadTable(tableIdent); - Assert.assertEquals("Should produce 2 snapshots", 2, Iterables.size(table.snapshots())); + assertThat(table.snapshots()).as("Should produce 2 snapshot").hasSize(2); try (CloseableIterable fileScanTasks = table.newScan().planFiles()) { ScanTaskSetManager taskSetManager = ScanTaskSetManager.get(); @@ -111,7 +111,9 @@ public void testTaskSetPlanning() throws NoSuchTableException, IOException { .option(SparkReadOptions.SCAN_TASK_SET_ID, setID) .option(SparkReadOptions.SPLIT_SIZE, tasks.get(0).file().fileSizeInBytes()) .load(tableName); - Assert.assertEquals("Num partitions should match", 2, scanDF.javaRDD().getNumPartitions()); + assertThat(scanDF.javaRDD().getNumPartitions()) + .as("Num partitions should match") + .isEqualTo(2); // load the staged file set and make sure we combine both files into a single split scanDF = @@ -121,7 +123,9 @@ public void testTaskSetPlanning() throws NoSuchTableException, IOException { .option(SparkReadOptions.SCAN_TASK_SET_ID, setID) .option(SparkReadOptions.SPLIT_SIZE, Long.MAX_VALUE) .load(tableName); - Assert.assertEquals("Num partitions should match", 1, scanDF.javaRDD().getNumPartitions()); + assertThat(scanDF.javaRDD().getNumPartitions()) + .as("Num partitions should match") + .isEqualTo(1); } } } diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkTable.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkTable.java index 616a196872de..ecb720d06e55 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkTable.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkTable.java @@ -18,34 +18,35 @@ */ package org.apache.iceberg.spark.source; +import static org.assertj.core.api.Assertions.assertThat; + import java.util.Map; -import org.apache.iceberg.spark.SparkCatalogTestBase; +import org.apache.iceberg.spark.CatalogTestBase; import org.apache.spark.sql.catalyst.analysis.NoSuchTableException; import org.apache.spark.sql.connector.catalog.CatalogManager; import org.apache.spark.sql.connector.catalog.Identifier; import org.apache.spark.sql.connector.catalog.TableCatalog; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.TestTemplate; -public class TestSparkTable extends SparkCatalogTestBase { +public class TestSparkTable extends CatalogTestBase { public TestSparkTable(String catalogName, String implementation, Map config) { super(catalogName, implementation, config); } - @Before + @BeforeEach public void createTable() { sql("CREATE TABLE %s (id bigint NOT NULL, data string) USING iceberg", tableName); } - @After + @AfterEach public void removeTable() { sql("DROP TABLE IF EXISTS %s", tableName); } - @Test + @TestTemplate public void testTableEquality() throws NoSuchTableException { CatalogManager catalogManager = spark.sessionState().catalogManager(); TableCatalog catalog = (TableCatalog) catalogManager.catalog(catalogName); @@ -54,7 +55,7 @@ public void testTableEquality() throws NoSuchTableException { SparkTable table2 = (SparkTable) catalog.loadTable(identifier); // different instances pointing to the same table must be equivalent - Assert.assertNotSame("References must be different", table1, table2); - Assert.assertEquals("Tables must be equivalent", table1, table2); + assertThat(table1).as("References must be different").isNotSameAs(table2); + assertThat(table1).as("Tables must be equivalent").isEqualTo(table2); } } diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestStreamingOffset.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestStreamingOffset.java index 17370aaa22f2..d55e718ff2d3 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestStreamingOffset.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestStreamingOffset.java @@ -18,11 +18,12 @@ */ package org.apache.iceberg.spark.source; +import static org.assertj.core.api.Assertions.assertThat; + import com.fasterxml.jackson.databind.node.ObjectNode; import java.util.Arrays; import org.apache.iceberg.util.JsonUtil; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestStreamingOffset { @@ -35,10 +36,9 @@ public void testJsonConversion() { new StreamingOffset(System.currentTimeMillis(), 3L, false), new StreamingOffset(System.currentTimeMillis(), 4L, true) }; - Assert.assertArrayEquals( - "StreamingOffsets should match", - expected, - Arrays.stream(expected).map(elem -> StreamingOffset.fromJson(elem.json())).toArray()); + assertThat(Arrays.stream(expected).map(elem -> StreamingOffset.fromJson(elem.json())).toArray()) + .as("StreamingOffsets should match") + .isEqualTo(expected); } @Test @@ -51,6 +51,6 @@ public void testToJson() throws Exception { actual.put("scan_all_files", false); String expectedJson = expected.json(); String actualJson = JsonUtil.mapper().writeValueAsString(actual); - Assert.assertEquals("Json should match", expectedJson, actualJson); + assertThat(actualJson).isEqualTo(expectedJson); } } diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestStructuredStreaming.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestStructuredStreaming.java index 464f1f5922b3..3d9d6eb8ebaf 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestStructuredStreaming.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestStructuredStreaming.java @@ -19,15 +19,17 @@ package org.apache.iceberg.spark.source; import static org.apache.iceberg.types.Types.NestedField.optional; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; import java.io.File; +import java.nio.file.Path; import java.util.List; import org.apache.hadoop.conf.Configuration; import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.Schema; import org.apache.iceberg.Table; import org.apache.iceberg.hadoop.HadoopTables; -import org.apache.iceberg.relocated.com.google.common.collect.Iterables; import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.types.Types; import org.apache.spark.sql.Dataset; @@ -40,13 +42,10 @@ import org.apache.spark.sql.streaming.DataStreamWriter; import org.apache.spark.sql.streaming.StreamingQuery; import org.apache.spark.sql.streaming.StreamingQueryException; -import org.assertj.core.api.Assertions; -import org.junit.AfterClass; -import org.junit.Assert; -import org.junit.BeforeClass; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; import scala.Option; import scala.collection.JavaConverters; @@ -58,9 +57,9 @@ public class TestStructuredStreaming { optional(1, "id", Types.IntegerType.get()), optional(2, "data", Types.StringType.get())); private static SparkSession spark = null; - @Rule public TemporaryFolder temp = new TemporaryFolder(); + @TempDir private Path temp; - @BeforeClass + @BeforeAll public static void startSpark() { TestStructuredStreaming.spark = SparkSession.builder() @@ -69,7 +68,7 @@ public static void startSpark() { .getOrCreate(); } - @AfterClass + @AfterAll public static void stopSpark() { SparkSession currentSpark = TestStructuredStreaming.spark; TestStructuredStreaming.spark = null; @@ -78,7 +77,7 @@ public static void stopSpark() { @Test public void testStreamingWriteAppendMode() throws Exception { - File parent = temp.newFolder("parquet"); + File parent = temp.resolve("parquet").toFile(); File location = new File(parent, "test-table"); File checkpoint = new File(parent, "checkpoint"); @@ -117,7 +116,7 @@ public void testStreamingWriteAppendMode() throws Exception { // remove the last commit to force Spark to reprocess batch #1 File lastCommitFile = new File(checkpoint.toString() + "/commits/1"); - Assert.assertTrue("The commit file must be deleted", lastCommitFile.delete()); + assertThat(lastCommitFile.delete()).as("The commit file must be deleted").isTrue(); // restart the query from the checkpoint StreamingQuery restartedQuery = streamWriter.start(); @@ -127,9 +126,10 @@ public void testStreamingWriteAppendMode() throws Exception { Dataset result = spark.read().format("iceberg").load(location.toString()); List actual = result.orderBy("id").as(Encoders.bean(SimpleRecord.class)).collectAsList(); - Assert.assertEquals("Number of rows should match", expected.size(), actual.size()); - Assert.assertEquals("Result rows should match", expected, actual); - Assert.assertEquals("Number of snapshots should match", 2, Iterables.size(table.snapshots())); + + assertThat(actual).as("Number of rows should match").hasSameSizeAs(expected); + assertThat(actual).as("Result rows should match").isEqualTo(expected); + assertThat(table.snapshots()).as("Number of snapshots should match").hasSize(2); } finally { for (StreamingQuery query : spark.streams().active()) { query.stop(); @@ -139,7 +139,7 @@ public void testStreamingWriteAppendMode() throws Exception { @Test public void testStreamingWriteCompleteMode() throws Exception { - File parent = temp.newFolder("parquet"); + File parent = temp.resolve("parquet").toFile(); File location = new File(parent, "test-table"); File checkpoint = new File(parent, "checkpoint"); @@ -177,7 +177,7 @@ public void testStreamingWriteCompleteMode() throws Exception { // remove the last commit to force Spark to reprocess batch #1 File lastCommitFile = new File(checkpoint.toString() + "/commits/1"); - Assert.assertTrue("The commit file must be deleted", lastCommitFile.delete()); + assertThat(lastCommitFile.delete()).as("The commit file must be deleted").isTrue(); // restart the query from the checkpoint StreamingQuery restartedQuery = streamWriter.start(); @@ -187,9 +187,10 @@ public void testStreamingWriteCompleteMode() throws Exception { Dataset result = spark.read().format("iceberg").load(location.toString()); List actual = result.orderBy("data").as(Encoders.bean(SimpleRecord.class)).collectAsList(); - Assert.assertEquals("Number of rows should match", expected.size(), actual.size()); - Assert.assertEquals("Result rows should match", expected, actual); - Assert.assertEquals("Number of snapshots should match", 2, Iterables.size(table.snapshots())); + + assertThat(actual).as("Number of rows should match").hasSameSizeAs(expected); + assertThat(actual).as("Result rows should match").isEqualTo(expected); + assertThat(table.snapshots()).as("Number of snapshots should match").hasSize(2); } finally { for (StreamingQuery query : spark.streams().active()) { query.stop(); @@ -199,7 +200,7 @@ public void testStreamingWriteCompleteMode() throws Exception { @Test public void testStreamingWriteCompleteModeWithProjection() throws Exception { - File parent = temp.newFolder("parquet"); + File parent = temp.resolve("parquet").toFile(); File location = new File(parent, "test-table"); File checkpoint = new File(parent, "checkpoint"); @@ -237,7 +238,7 @@ public void testStreamingWriteCompleteModeWithProjection() throws Exception { // remove the last commit to force Spark to reprocess batch #1 File lastCommitFile = new File(checkpoint.toString() + "/commits/1"); - Assert.assertTrue("The commit file must be deleted", lastCommitFile.delete()); + assertThat(lastCommitFile.delete()).as("The commit file must be deleted").isTrue(); // restart the query from the checkpoint StreamingQuery restartedQuery = streamWriter.start(); @@ -247,9 +248,10 @@ public void testStreamingWriteCompleteModeWithProjection() throws Exception { Dataset result = spark.read().format("iceberg").load(location.toString()); List actual = result.orderBy("id").as(Encoders.bean(SimpleRecord.class)).collectAsList(); - Assert.assertEquals("Number of rows should match", expected.size(), actual.size()); - Assert.assertEquals("Result rows should match", expected, actual); - Assert.assertEquals("Number of snapshots should match", 2, Iterables.size(table.snapshots())); + + assertThat(actual).as("Number of rows should match").hasSameSizeAs(expected); + assertThat(actual).as("Result rows should match").isEqualTo(expected); + assertThat(table.snapshots()).as("Number of snapshots should match").hasSize(2); } finally { for (StreamingQuery query : spark.streams().active()) { query.stop(); @@ -259,7 +261,7 @@ public void testStreamingWriteCompleteModeWithProjection() throws Exception { @Test public void testStreamingWriteUpdateMode() throws Exception { - File parent = temp.newFolder("parquet"); + File parent = temp.resolve("parquet").toFile(); File location = new File(parent, "test-table"); File checkpoint = new File(parent, "checkpoint"); @@ -283,7 +285,7 @@ public void testStreamingWriteUpdateMode() throws Exception { List batch1 = Lists.newArrayList(1, 2); send(batch1, inputStream); - Assertions.assertThatThrownBy(query::processAllAvailable) + assertThatThrownBy(query::processAllAvailable) .isInstanceOf(StreamingQueryException.class) .hasMessageContaining("does not support Update mode"); } finally { diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestWriteMetricsConfig.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestWriteMetricsConfig.java index 1e2a825d8e76..841268a6be0e 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestWriteMetricsConfig.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestWriteMetricsConfig.java @@ -21,9 +21,12 @@ import static org.apache.iceberg.spark.SparkSchemaUtil.convert; import static org.apache.iceberg.types.Types.NestedField.optional; import static org.apache.iceberg.types.Types.NestedField.required; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; import java.io.IOException; import java.nio.ByteBuffer; +import java.nio.file.Path; import java.util.List; import java.util.Map; import org.apache.hadoop.conf.Configuration; @@ -48,13 +51,10 @@ import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.catalyst.InternalRow; -import org.assertj.core.api.Assertions; -import org.junit.AfterClass; -import org.junit.Assert; -import org.junit.BeforeClass; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; public class TestWriteMetricsConfig { @@ -73,18 +73,18 @@ public class TestWriteMetricsConfig { required(4, "id", Types.IntegerType.get()), required(5, "data", Types.StringType.get())))); - @Rule public TemporaryFolder temp = new TemporaryFolder(); + @TempDir private Path temp; private static SparkSession spark = null; private static JavaSparkContext sc = null; - @BeforeClass + @BeforeAll public static void startSpark() { TestWriteMetricsConfig.spark = SparkSession.builder().master("local[2]").getOrCreate(); TestWriteMetricsConfig.sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); } - @AfterClass + @AfterAll public static void stopSpark() { SparkSession currentSpark = TestWriteMetricsConfig.spark; TestWriteMetricsConfig.spark = null; @@ -94,7 +94,7 @@ public static void stopSpark() { @Test public void testFullMetricsCollectionForParquet() throws IOException { - String tableLocation = temp.newFolder("iceberg-table").toString(); + String tableLocation = temp.resolve("iceberg-table").toFile().toString(); HadoopTables tables = new HadoopTables(CONF); PartitionSpec spec = PartitionSpec.unpartitioned(); @@ -116,16 +116,17 @@ public void testFullMetricsCollectionForParquet() throws IOException { for (FileScanTask task : table.newScan().includeColumnStats().planFiles()) { DataFile file = task.file(); - Assert.assertEquals(2, file.nullValueCounts().size()); - Assert.assertEquals(2, file.valueCounts().size()); - Assert.assertEquals(2, file.lowerBounds().size()); - Assert.assertEquals(2, file.upperBounds().size()); + + assertThat(file.nullValueCounts()).hasSize(2); + assertThat(file.valueCounts()).hasSize(2); + assertThat(file.lowerBounds()).hasSize(2); + assertThat(file.upperBounds()).hasSize(2); } } @Test public void testCountMetricsCollectionForParquet() throws IOException { - String tableLocation = temp.newFolder("iceberg-table").toString(); + String tableLocation = temp.resolve("iceberg-table").toFile().toString(); HadoopTables tables = new HadoopTables(CONF); PartitionSpec spec = PartitionSpec.unpartitioned(); @@ -147,16 +148,16 @@ public void testCountMetricsCollectionForParquet() throws IOException { for (FileScanTask task : table.newScan().includeColumnStats().planFiles()) { DataFile file = task.file(); - Assert.assertEquals(2, file.nullValueCounts().size()); - Assert.assertEquals(2, file.valueCounts().size()); - Assert.assertTrue(file.lowerBounds().isEmpty()); - Assert.assertTrue(file.upperBounds().isEmpty()); + assertThat(file.nullValueCounts()).hasSize(2); + assertThat(file.valueCounts()).hasSize(2); + assertThat(file.lowerBounds()).isEmpty(); + assertThat(file.upperBounds()).isEmpty(); } } @Test public void testNoMetricsCollectionForParquet() throws IOException { - String tableLocation = temp.newFolder("iceberg-table").toString(); + String tableLocation = temp.resolve("iceberg-table").toFile().toString(); HadoopTables tables = new HadoopTables(CONF); PartitionSpec spec = PartitionSpec.unpartitioned(); @@ -178,16 +179,16 @@ public void testNoMetricsCollectionForParquet() throws IOException { for (FileScanTask task : table.newScan().includeColumnStats().planFiles()) { DataFile file = task.file(); - Assert.assertTrue(file.nullValueCounts().isEmpty()); - Assert.assertTrue(file.valueCounts().isEmpty()); - Assert.assertTrue(file.lowerBounds().isEmpty()); - Assert.assertTrue(file.upperBounds().isEmpty()); + assertThat(file.nullValueCounts()).isEmpty(); + assertThat(file.valueCounts()).isEmpty(); + assertThat(file.lowerBounds()).isEmpty(); + assertThat(file.upperBounds()).isEmpty(); } } @Test public void testCustomMetricCollectionForParquet() throws IOException { - String tableLocation = temp.newFolder("iceberg-table").toString(); + String tableLocation = temp.resolve("iceberg-table").toFile().toString(); HadoopTables tables = new HadoopTables(CONF); PartitionSpec spec = PartitionSpec.unpartitioned(); @@ -212,18 +213,16 @@ public void testCustomMetricCollectionForParquet() throws IOException { Types.NestedField id = schema.findField("id"); for (FileScanTask task : table.newScan().includeColumnStats().planFiles()) { DataFile file = task.file(); - Assert.assertEquals(2, file.nullValueCounts().size()); - Assert.assertEquals(2, file.valueCounts().size()); - Assert.assertEquals(1, file.lowerBounds().size()); - Assert.assertTrue(file.lowerBounds().containsKey(id.fieldId())); - Assert.assertEquals(1, file.upperBounds().size()); - Assert.assertTrue(file.upperBounds().containsKey(id.fieldId())); + assertThat(file.nullValueCounts()).hasSize(2); + assertThat(file.valueCounts()).hasSize(2); + assertThat(file.lowerBounds()).hasSize(1).containsKey(id.fieldId()); + assertThat(file.upperBounds()).hasSize(1).containsKey(id.fieldId()); } } @Test public void testBadCustomMetricCollectionForParquet() throws IOException { - String tableLocation = temp.newFolder("iceberg-table").toString(); + String tableLocation = temp.resolve("iceberg-table").toFile().toString(); HadoopTables tables = new HadoopTables(CONF); PartitionSpec spec = PartitionSpec.unpartitioned(); @@ -231,8 +230,7 @@ public void testBadCustomMetricCollectionForParquet() throws IOException { properties.put(TableProperties.DEFAULT_WRITE_METRICS_MODE, "counts"); properties.put("write.metadata.metrics.column.ids", "full"); - Assertions.assertThatThrownBy( - () -> tables.create(SIMPLE_SCHEMA, spec, properties, tableLocation)) + assertThatThrownBy(() -> tables.create(SIMPLE_SCHEMA, spec, properties, tableLocation)) .isInstanceOf(ValidationException.class) .hasMessageStartingWith( "Invalid metrics config, could not find column ids from table prop write.metadata.metrics.column.ids in schema table"); @@ -240,7 +238,7 @@ public void testBadCustomMetricCollectionForParquet() throws IOException { @Test public void testCustomMetricCollectionForNestedParquet() throws IOException { - String tableLocation = temp.newFolder("iceberg-table").toString(); + String tableLocation = temp.resolve("iceberg-table").toFile().toString(); HadoopTables tables = new HadoopTables(CONF); PartitionSpec spec = PartitionSpec.builderFor(COMPLEX_SCHEMA).identity("strCol").build(); @@ -271,28 +269,30 @@ public void testCustomMetricCollectionForNestedParquet() throws IOException { DataFile file = task.file(); Map nullValueCounts = file.nullValueCounts(); - Assert.assertEquals(3, nullValueCounts.size()); - Assert.assertTrue(nullValueCounts.containsKey(longCol.fieldId())); - Assert.assertTrue(nullValueCounts.containsKey(recordId.fieldId())); - Assert.assertTrue(nullValueCounts.containsKey(recordData.fieldId())); + assertThat(nullValueCounts) + .hasSize(3) + .containsKey(longCol.fieldId()) + .containsKey(recordId.fieldId()) + .containsKey(recordData.fieldId()); Map valueCounts = file.valueCounts(); - Assert.assertEquals(3, valueCounts.size()); - Assert.assertTrue(valueCounts.containsKey(longCol.fieldId())); - Assert.assertTrue(valueCounts.containsKey(recordId.fieldId())); - Assert.assertTrue(valueCounts.containsKey(recordData.fieldId())); + assertThat(valueCounts) + .hasSize(3) + .containsKey(longCol.fieldId()) + .containsKey(recordId.fieldId()) + .containsKey(recordData.fieldId()); Map lowerBounds = file.lowerBounds(); - Assert.assertEquals(2, lowerBounds.size()); - Assert.assertTrue(lowerBounds.containsKey(recordId.fieldId())); + assertThat(lowerBounds).hasSize(2).containsKey(recordId.fieldId()); + ByteBuffer recordDataLowerBound = lowerBounds.get(recordData.fieldId()); - Assert.assertEquals(2, ByteBuffers.toByteArray(recordDataLowerBound).length); + assertThat(ByteBuffers.toByteArray(recordDataLowerBound)).hasSize(2); Map upperBounds = file.upperBounds(); - Assert.assertEquals(2, upperBounds.size()); - Assert.assertTrue(upperBounds.containsKey(recordId.fieldId())); + assertThat(upperBounds).hasSize(2).containsKey(recordId.fieldId()); + ByteBuffer recordDataUpperBound = upperBounds.get(recordData.fieldId()); - Assert.assertEquals(2, ByteBuffers.toByteArray(recordDataUpperBound).length); + assertThat(ByteBuffers.toByteArray(recordDataUpperBound)).hasSize(2); } } }