diff --git a/.palantir/revapi.yml b/.palantir/revapi.yml index 5ac91ec0a96f..332b6234d48f 100644 --- a/.palantir/revapi.yml +++ b/.palantir/revapi.yml @@ -774,6 +774,11 @@ acceptedBreaks: - code: "java.method.removed" old: "method org.apache.iceberg.view.ViewBuilder org.apache.iceberg.view.ViewBuilder::withQueryColumnNames(java.util.List)" justification: "Acceptable break due to updating View APIs and the View Spec" + org.apache.iceberg:iceberg-core: + - code: "java.field.constantValueChanged" + old: "field org.apache.iceberg.TableProperties.PARQUET_COMPRESSION_DEFAULT" + new: "field org.apache.iceberg.TableProperties.PARQUET_COMPRESSION_DEFAULT" + justification: "Changing the default compression codec from gzip to zstd" apache-iceberg-0.14.0: org.apache.iceberg:iceberg-api: - code: "java.class.defaultSerializationChanged" diff --git a/core/src/main/java/org/apache/iceberg/TableProperties.java b/core/src/main/java/org/apache/iceberg/TableProperties.java index b14354def6ac..23512a486ef8 100644 --- a/core/src/main/java/org/apache/iceberg/TableProperties.java +++ b/core/src/main/java/org/apache/iceberg/TableProperties.java @@ -142,7 +142,7 @@ private TableProperties() {} public static final String PARQUET_COMPRESSION = "write.parquet.compression-codec"; public static final String DELETE_PARQUET_COMPRESSION = "write.delete.parquet.compression-codec"; - public static final String PARQUET_COMPRESSION_DEFAULT = "gzip"; + public static final String PARQUET_COMPRESSION_DEFAULT = "zstd"; public static final String PARQUET_COMPRESSION_LEVEL = "write.parquet.compression-level"; public static final String DELETE_PARQUET_COMPRESSION_LEVEL = diff --git a/docs/configuration.md b/docs/configuration.md index 7fa2d94adf91..60e026760f6a 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -55,7 +55,7 @@ Iceberg tables support table properties to configure table behavior, like the de | write.parquet.page-size-bytes | 1048576 (1 MB) | Parquet page size | | write.parquet.page-row-limit | 20000 | Parquet page row limit | | write.parquet.dict-size-bytes | 2097152 (2 MB) | Parquet dictionary page size | -| write.parquet.compression-codec | gzip | Parquet compression codec: zstd, brotli, lz4, gzip, snappy, uncompressed | +| write.parquet.compression-codec | zstd | Parquet compression codec: zstd, brotli, lz4, gzip, snappy, uncompressed | | write.parquet.compression-level | null | Parquet compression level | | write.parquet.bloom-filter-enabled.column.col1 | (not set) | Hint to parquet to write a bloom filter for the column: col1 | | write.parquet.bloom-filter-max-bytes | 1048576 (1 MB) | The maximum number of bytes for a bloom filter bitset | diff --git a/flink/v1.15/flink/src/test/java/org/apache/iceberg/flink/source/TestMetadataTableReadableMetrics.java b/flink/v1.15/flink/src/test/java/org/apache/iceberg/flink/source/TestMetadataTableReadableMetrics.java index cb6fda18a1ee..f05bf2fcd9e5 100644 --- a/flink/v1.15/flink/src/test/java/org/apache/iceberg/flink/source/TestMetadataTableReadableMetrics.java +++ b/flink/v1.15/flink/src/test/java/org/apache/iceberg/flink/source/TestMetadataTableReadableMetrics.java @@ -219,27 +219,27 @@ public void testPrimitiveColumns() throws Exception { Row binaryCol = Row.of( - 59L, + 52L, 4L, 2L, null, Base64.getDecoder().decode("1111"), Base64.getDecoder().decode("2222")); - Row booleanCol = Row.of(44L, 4L, 0L, null, false, true); - Row decimalCol = Row.of(97L, 4L, 1L, null, new BigDecimal("1.00"), new BigDecimal("2.00")); - Row doubleCol = Row.of(99L, 4L, 0L, 1L, 1.0D, 2.0D); + Row booleanCol = Row.of(32L, 4L, 0L, null, false, true); + Row decimalCol = Row.of(85L, 4L, 1L, null, new BigDecimal("1.00"), new BigDecimal("2.00")); + Row doubleCol = Row.of(85L, 4L, 0L, 1L, 1.0D, 2.0D); Row fixedCol = Row.of( - 55L, + 44L, 4L, 2L, null, Base64.getDecoder().decode("1111"), Base64.getDecoder().decode("2222")); - Row floatCol = Row.of(90L, 4L, 0L, 2L, 0f, 0f); - Row intCol = Row.of(91L, 4L, 0L, null, 1, 2); - Row longCol = Row.of(91L, 4L, 0L, null, 1L, 2L); - Row stringCol = Row.of(99L, 4L, 0L, null, "1", "2"); + Row floatCol = Row.of(71L, 4L, 0L, 2L, 0f, 0f); + Row intCol = Row.of(71L, 4L, 0L, null, 1, 2); + Row longCol = Row.of(79L, 4L, 0L, null, 1L, 2L); + Row stringCol = Row.of(79L, 4L, 0L, null, "1", "2"); List expected = Lists.newArrayList( @@ -291,7 +291,7 @@ public void testSelectNestedValues() throws Exception { public void testNestedValues() throws Exception { createNestedTable(); - Row leafDoubleCol = Row.of(53L, 3L, 1L, 1L, 0.0D, 0.0D); + Row leafDoubleCol = Row.of(46L, 3L, 1L, 1L, 0.0D, 0.0D); Row leafLongCol = Row.of(54L, 3L, 1L, null, 0L, 1L); Row metrics = Row.of(Row.of(leafDoubleCol, leafLongCol)); diff --git a/flink/v1.16/flink/src/test/java/org/apache/iceberg/flink/source/TestMetadataTableReadableMetrics.java b/flink/v1.16/flink/src/test/java/org/apache/iceberg/flink/source/TestMetadataTableReadableMetrics.java index cb6fda18a1ee..f05bf2fcd9e5 100644 --- a/flink/v1.16/flink/src/test/java/org/apache/iceberg/flink/source/TestMetadataTableReadableMetrics.java +++ b/flink/v1.16/flink/src/test/java/org/apache/iceberg/flink/source/TestMetadataTableReadableMetrics.java @@ -219,27 +219,27 @@ public void testPrimitiveColumns() throws Exception { Row binaryCol = Row.of( - 59L, + 52L, 4L, 2L, null, Base64.getDecoder().decode("1111"), Base64.getDecoder().decode("2222")); - Row booleanCol = Row.of(44L, 4L, 0L, null, false, true); - Row decimalCol = Row.of(97L, 4L, 1L, null, new BigDecimal("1.00"), new BigDecimal("2.00")); - Row doubleCol = Row.of(99L, 4L, 0L, 1L, 1.0D, 2.0D); + Row booleanCol = Row.of(32L, 4L, 0L, null, false, true); + Row decimalCol = Row.of(85L, 4L, 1L, null, new BigDecimal("1.00"), new BigDecimal("2.00")); + Row doubleCol = Row.of(85L, 4L, 0L, 1L, 1.0D, 2.0D); Row fixedCol = Row.of( - 55L, + 44L, 4L, 2L, null, Base64.getDecoder().decode("1111"), Base64.getDecoder().decode("2222")); - Row floatCol = Row.of(90L, 4L, 0L, 2L, 0f, 0f); - Row intCol = Row.of(91L, 4L, 0L, null, 1, 2); - Row longCol = Row.of(91L, 4L, 0L, null, 1L, 2L); - Row stringCol = Row.of(99L, 4L, 0L, null, "1", "2"); + Row floatCol = Row.of(71L, 4L, 0L, 2L, 0f, 0f); + Row intCol = Row.of(71L, 4L, 0L, null, 1, 2); + Row longCol = Row.of(79L, 4L, 0L, null, 1L, 2L); + Row stringCol = Row.of(79L, 4L, 0L, null, "1", "2"); List expected = Lists.newArrayList( @@ -291,7 +291,7 @@ public void testSelectNestedValues() throws Exception { public void testNestedValues() throws Exception { createNestedTable(); - Row leafDoubleCol = Row.of(53L, 3L, 1L, 1L, 0.0D, 0.0D); + Row leafDoubleCol = Row.of(46L, 3L, 1L, 1L, 0.0D, 0.0D); Row leafLongCol = Row.of(54L, 3L, 1L, null, 0L, 1L); Row metrics = Row.of(Row.of(leafDoubleCol, leafLongCol)); diff --git a/flink/v1.17/flink/src/test/java/org/apache/iceberg/flink/source/TestMetadataTableReadableMetrics.java b/flink/v1.17/flink/src/test/java/org/apache/iceberg/flink/source/TestMetadataTableReadableMetrics.java index cb6fda18a1ee..f05bf2fcd9e5 100644 --- a/flink/v1.17/flink/src/test/java/org/apache/iceberg/flink/source/TestMetadataTableReadableMetrics.java +++ b/flink/v1.17/flink/src/test/java/org/apache/iceberg/flink/source/TestMetadataTableReadableMetrics.java @@ -219,27 +219,27 @@ public void testPrimitiveColumns() throws Exception { Row binaryCol = Row.of( - 59L, + 52L, 4L, 2L, null, Base64.getDecoder().decode("1111"), Base64.getDecoder().decode("2222")); - Row booleanCol = Row.of(44L, 4L, 0L, null, false, true); - Row decimalCol = Row.of(97L, 4L, 1L, null, new BigDecimal("1.00"), new BigDecimal("2.00")); - Row doubleCol = Row.of(99L, 4L, 0L, 1L, 1.0D, 2.0D); + Row booleanCol = Row.of(32L, 4L, 0L, null, false, true); + Row decimalCol = Row.of(85L, 4L, 1L, null, new BigDecimal("1.00"), new BigDecimal("2.00")); + Row doubleCol = Row.of(85L, 4L, 0L, 1L, 1.0D, 2.0D); Row fixedCol = Row.of( - 55L, + 44L, 4L, 2L, null, Base64.getDecoder().decode("1111"), Base64.getDecoder().decode("2222")); - Row floatCol = Row.of(90L, 4L, 0L, 2L, 0f, 0f); - Row intCol = Row.of(91L, 4L, 0L, null, 1, 2); - Row longCol = Row.of(91L, 4L, 0L, null, 1L, 2L); - Row stringCol = Row.of(99L, 4L, 0L, null, "1", "2"); + Row floatCol = Row.of(71L, 4L, 0L, 2L, 0f, 0f); + Row intCol = Row.of(71L, 4L, 0L, null, 1, 2); + Row longCol = Row.of(79L, 4L, 0L, null, 1L, 2L); + Row stringCol = Row.of(79L, 4L, 0L, null, "1", "2"); List expected = Lists.newArrayList( @@ -291,7 +291,7 @@ public void testSelectNestedValues() throws Exception { public void testNestedValues() throws Exception { createNestedTable(); - Row leafDoubleCol = Row.of(53L, 3L, 1L, 1L, 0.0D, 0.0D); + Row leafDoubleCol = Row.of(46L, 3L, 1L, 1L, 0.0D, 0.0D); Row leafLongCol = Row.of(54L, 3L, 1L, null, 0L, 1L); Row metrics = Row.of(Row.of(leafDoubleCol, leafLongCol)); diff --git a/spark/v3.3/spark/src/test/java/org/apache/iceberg/spark/source/TestMetadataTableReadableMetrics.java b/spark/v3.3/spark/src/test/java/org/apache/iceberg/spark/source/TestMetadataTableReadableMetrics.java index 416d5eed5b65..5285158ec99d 100644 --- a/spark/v3.3/spark/src/test/java/org/apache/iceberg/spark/source/TestMetadataTableReadableMetrics.java +++ b/spark/v3.3/spark/src/test/java/org/apache/iceberg/spark/source/TestMetadataTableReadableMetrics.java @@ -326,7 +326,7 @@ public void testSelectNestedValues() throws Exception { public void testNestedValues() throws Exception { createNestedTable(); - Object[] leafDoubleCol = row(53L, 3L, 1L, 1L, 0.0D, 0.0D); + Object[] leafDoubleCol = row(46L, 3L, 1L, 1L, 0.0D, 0.0D); Object[] leafLongCol = row(54L, 3L, 1L, null, 0L, 1L); Object[] metrics = row(leafDoubleCol, leafLongCol); diff --git a/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/source/TestMetadataTableReadableMetrics.java b/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/source/TestMetadataTableReadableMetrics.java index f65da4574284..9fb754776818 100644 --- a/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/source/TestMetadataTableReadableMetrics.java +++ b/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/source/TestMetadataTableReadableMetrics.java @@ -353,7 +353,7 @@ public void testSelectNestedValues() throws Exception { public void testNestedValues() throws Exception { createNestedTable(); - Object[] leafDoubleCol = row(53L, 3L, 1L, 1L, 0.0D, 0.0D); + Object[] leafDoubleCol = row(46L, 3L, 1L, 1L, 0.0D, 0.0D); Object[] leafLongCol = row(54L, 3L, 1L, null, 0L, 1L); Object[] metrics = row(leafDoubleCol, leafLongCol);