From f7b3814325ef395b2eca902c4758669c987dd338 Mon Sep 17 00:00:00 2001 From: ajantha-bhat Date: Fri, 23 Aug 2024 20:12:30 +0800 Subject: [PATCH] Default values and benchmark enhancement --- .../apache/iceberg/PartitionStatsUtil.java | 13 ++ .../data/TestPartitionStatsGenerator.java | 152 +++++++++--------- 2 files changed, 89 insertions(+), 76 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/PartitionStatsUtil.java b/core/src/main/java/org/apache/iceberg/PartitionStatsUtil.java index ef8fa35f5419..8c21bccf6b18 100644 --- a/core/src/main/java/org/apache/iceberg/PartitionStatsUtil.java +++ b/core/src/main/java/org/apache/iceberg/PartitionStatsUtil.java @@ -157,20 +157,31 @@ private static Record fromManifestEntry( record.set(Column.DATA_RECORD_COUNT.ordinal(), entry.file().recordCount()); record.set(Column.DATA_FILE_COUNT.ordinal(), 1); record.set(Column.TOTAL_DATA_FILE_SIZE_IN_BYTES.ordinal(), entry.file().fileSizeInBytes()); + // default values + record.set(Column.POSITION_DELETE_RECORD_COUNT.ordinal(), 0L); + record.set(Column.POSITION_DELETE_FILE_COUNT.ordinal(), 0); + record.set(Column.EQUALITY_DELETE_RECORD_COUNT.ordinal(), 0L); + record.set(Column.EQUALITY_DELETE_FILE_COUNT.ordinal(), 0); break; case POSITION_DELETES: record.set(Column.POSITION_DELETE_RECORD_COUNT.ordinal(), entry.file().recordCount()); record.set(Column.POSITION_DELETE_FILE_COUNT.ordinal(), 1); + // default values record.set(Column.DATA_RECORD_COUNT.ordinal(), 0L); record.set(Column.DATA_FILE_COUNT.ordinal(), 0); record.set(Column.TOTAL_DATA_FILE_SIZE_IN_BYTES.ordinal(), 0L); + record.set(Column.EQUALITY_DELETE_RECORD_COUNT.ordinal(), 0L); + record.set(Column.EQUALITY_DELETE_FILE_COUNT.ordinal(), 0); break; case EQUALITY_DELETES: record.set(Column.EQUALITY_DELETE_RECORD_COUNT.ordinal(), entry.file().recordCount()); record.set(Column.EQUALITY_DELETE_FILE_COUNT.ordinal(), 1); + // default values record.set(Column.DATA_RECORD_COUNT.ordinal(), 0L); record.set(Column.DATA_FILE_COUNT.ordinal(), 0); record.set(Column.TOTAL_DATA_FILE_SIZE_IN_BYTES.ordinal(), 0L); + record.set(Column.POSITION_DELETE_RECORD_COUNT.ordinal(), 0L); + record.set(Column.POSITION_DELETE_FILE_COUNT.ordinal(), 0); break; default: throw new UnsupportedOperationException( @@ -178,6 +189,8 @@ private static Record fromManifestEntry( } // Note: Not computing the `TOTAL_RECORD_COUNT` for now as it needs scanning the data. + record.set(Column.TOTAL_RECORD_COUNT.ordinal(), 0L); + return record; } diff --git a/data/src/test/java/org/apache/iceberg/data/TestPartitionStatsGenerator.java b/data/src/test/java/org/apache/iceberg/data/TestPartitionStatsGenerator.java index b06f34ac6ca3..1f2e2448db4c 100644 --- a/data/src/test/java/org/apache/iceberg/data/TestPartitionStatsGenerator.java +++ b/data/src/test/java/org/apache/iceberg/data/TestPartitionStatsGenerator.java @@ -182,11 +182,11 @@ public void testPartitionStats() throws Exception { 9L, 3, 3 * dataFile1.fileSizeInBytes(), - null, - null, - null, - null, - null, + 0L, + 0, + 0L, + 0, + 0L, snapshot1.timestampMillis(), snapshot1.snapshotId()), Tuple.tuple( @@ -195,11 +195,11 @@ public void testPartitionStats() throws Exception { 3L, 3, 3 * dataFile2.fileSizeInBytes(), - null, - null, - null, - null, - null, + 0L, + 0, + 0L, + 0, + 0L, snapshot1.timestampMillis(), snapshot1.snapshotId()), Tuple.tuple( @@ -208,11 +208,11 @@ public void testPartitionStats() throws Exception { 3L, 3, 3 * dataFile3.fileSizeInBytes(), - null, - null, - null, - null, - null, + 0L, + 0, + 0L, + 0, + 0L, snapshot1.timestampMillis(), snapshot1.snapshotId()), Tuple.tuple( @@ -221,11 +221,11 @@ public void testPartitionStats() throws Exception { 6L, 3, 3 * dataFile4.fileSizeInBytes(), - null, - null, - null, - null, - null, + 0L, + 0, + 0L, + 0, + 0L, snapshot1.timestampMillis(), snapshot1.snapshotId())); @@ -247,11 +247,11 @@ public void testPartitionStats() throws Exception { 9L, 3, 3 * dataFile1.fileSizeInBytes(), - null, - null, + 0L, + 0, eqDeletes.recordCount(), 1, - null, + 0L, snapshot3.timestampMillis(), snapshot3.snapshotId()), Tuple.tuple( @@ -260,11 +260,11 @@ public void testPartitionStats() throws Exception { 3L, 3, 3 * dataFile2.fileSizeInBytes(), - null, - null, - null, - null, - null, + 0L, + 0, + 0L, + 0, + 0L, snapshot1.timestampMillis(), snapshot1.snapshotId()), Tuple.tuple( @@ -275,9 +275,9 @@ public void testPartitionStats() throws Exception { 3 * dataFile3.fileSizeInBytes(), posDeletes.recordCount(), 1, - null, - null, - null, + 0L, + 0, + 0L, snapshot2.timestampMillis(), snapshot2.snapshotId()), Tuple.tuple( @@ -286,11 +286,11 @@ public void testPartitionStats() throws Exception { 6L, 3, 3 * dataFile4.fileSizeInBytes(), - null, - null, - null, - null, - null, + 0L, + 0, + 0L, + 0, + 0L, snapshot1.timestampMillis(), snapshot1.snapshotId())); } @@ -336,11 +336,11 @@ public void testPartitionStatsWithSchemaEvolution() throws Exception { 8L, 2, 2 * dataFile1.fileSizeInBytes(), - null, - null, - null, - null, - null, + 0L, + 0, + 0L, + 0, + 0L, snapshot1.timestampMillis(), snapshot1.snapshotId()), Tuple.tuple( @@ -349,11 +349,11 @@ public void testPartitionStatsWithSchemaEvolution() throws Exception { 6L, 2, 2 * dataFile2.fileSizeInBytes(), - null, - null, - null, - null, - null, + 0L, + 0, + 0L, + 0, + 0L, snapshot1.timestampMillis(), snapshot1.snapshotId())); @@ -402,11 +402,11 @@ public void testPartitionStatsWithSchemaEvolution() throws Exception { 8L, 2, 2 * dataFile1.fileSizeInBytes(), - null, - null, - null, - null, - null, + 0L, + 0, + 0L, + 0, + 0L, snapshot1.timestampMillis(), snapshot1.snapshotId()), Tuple.tuple( @@ -415,11 +415,11 @@ public void testPartitionStatsWithSchemaEvolution() throws Exception { 7L, 3, 2 * dataFile2.fileSizeInBytes() + dataFile7.fileSizeInBytes(), - null, - null, - null, - null, - null, + 0L, + 0, + 0L, + 0, + 0L, snapshot2.timestampMillis(), snapshot2.snapshotId()), Tuple.tuple( @@ -428,11 +428,11 @@ public void testPartitionStatsWithSchemaEvolution() throws Exception { 3L, 1, dataFile3.fileSizeInBytes(), - null, - null, - null, - null, - null, + 0L, + 0, + 0L, + 0, + 0L, snapshot2.timestampMillis(), snapshot2.snapshotId()), Tuple.tuple( @@ -441,11 +441,11 @@ public void testPartitionStatsWithSchemaEvolution() throws Exception { 1L, 1, dataFile4.fileSizeInBytes(), - null, - null, - null, - null, - null, + 0L, + 0, + 0L, + 0, + 0L, snapshot2.timestampMillis(), snapshot2.snapshotId()), Tuple.tuple( @@ -454,11 +454,11 @@ public void testPartitionStatsWithSchemaEvolution() throws Exception { 1L, 1, dataFile5.fileSizeInBytes(), - null, - null, - null, - null, - null, + 0L, + 0, + 0L, + 0, + 0L, snapshot2.timestampMillis(), snapshot2.snapshotId()), Tuple.tuple( @@ -467,11 +467,11 @@ public void testPartitionStatsWithSchemaEvolution() throws Exception { 2L, 1, dataFile6.fileSizeInBytes(), - null, - null, - null, - null, - null, + 0L, + 0, + 0L, + 0, + 0L, snapshot2.timestampMillis(), snapshot2.snapshotId())); }